xref: /freebsd/sys/dev/mlx5/mlx5_en/mlx5_en_main.c (revision 3311ff84eac3b7e82f28e331df0586036c6d361c)
1 /*-
2  * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  *
25  * $FreeBSD$
26  */
27 
28 #include "en.h"
29 
30 #include <sys/sockio.h>
31 #include <machine/atomic.h>
32 
33 #define	ETH_DRIVER_VERSION	"3.1.0-dev"
34 char mlx5e_version[] = "Mellanox Ethernet driver"
35     " (" ETH_DRIVER_VERSION ")";
36 
37 struct mlx5e_rq_param {
38 	u32	rqc [MLX5_ST_SZ_DW(rqc)];
39 	struct mlx5_wq_param wq;
40 };
41 
42 struct mlx5e_sq_param {
43 	u32	sqc [MLX5_ST_SZ_DW(sqc)];
44 	struct mlx5_wq_param wq;
45 };
46 
47 struct mlx5e_cq_param {
48 	u32	cqc [MLX5_ST_SZ_DW(cqc)];
49 	struct mlx5_wq_param wq;
50 	u16	eq_ix;
51 };
52 
53 struct mlx5e_channel_param {
54 	struct mlx5e_rq_param rq;
55 	struct mlx5e_sq_param sq;
56 	struct mlx5e_cq_param rx_cq;
57 	struct mlx5e_cq_param tx_cq;
58 };
59 
60 static const struct {
61 	u32	subtype;
62 	u64	baudrate;
63 }	mlx5e_mode_table[MLX5E_LINK_MODES_NUMBER] = {
64 
65 	[MLX5E_1000BASE_CX_SGMII] = {
66 		.subtype = IFM_1000_CX_SGMII,
67 		.baudrate = IF_Mbps(1000ULL),
68 	},
69 	[MLX5E_1000BASE_KX] = {
70 		.subtype = IFM_1000_KX,
71 		.baudrate = IF_Mbps(1000ULL),
72 	},
73 	[MLX5E_10GBASE_CX4] = {
74 		.subtype = IFM_10G_CX4,
75 		.baudrate = IF_Gbps(10ULL),
76 	},
77 	[MLX5E_10GBASE_KX4] = {
78 		.subtype = IFM_10G_KX4,
79 		.baudrate = IF_Gbps(10ULL),
80 	},
81 	[MLX5E_10GBASE_KR] = {
82 		.subtype = IFM_10G_KR,
83 		.baudrate = IF_Gbps(10ULL),
84 	},
85 	[MLX5E_20GBASE_KR2] = {
86 		.subtype = IFM_20G_KR2,
87 		.baudrate = IF_Gbps(20ULL),
88 	},
89 	[MLX5E_40GBASE_CR4] = {
90 		.subtype = IFM_40G_CR4,
91 		.baudrate = IF_Gbps(40ULL),
92 	},
93 	[MLX5E_40GBASE_KR4] = {
94 		.subtype = IFM_40G_KR4,
95 		.baudrate = IF_Gbps(40ULL),
96 	},
97 	[MLX5E_56GBASE_R4] = {
98 		.subtype = IFM_56G_R4,
99 		.baudrate = IF_Gbps(56ULL),
100 	},
101 	[MLX5E_10GBASE_CR] = {
102 		.subtype = IFM_10G_CR1,
103 		.baudrate = IF_Gbps(10ULL),
104 	},
105 	[MLX5E_10GBASE_SR] = {
106 		.subtype = IFM_10G_SR,
107 		.baudrate = IF_Gbps(10ULL),
108 	},
109 	[MLX5E_10GBASE_LR] = {
110 		.subtype = IFM_10G_LR,
111 		.baudrate = IF_Gbps(10ULL),
112 	},
113 	[MLX5E_40GBASE_SR4] = {
114 		.subtype = IFM_40G_SR4,
115 		.baudrate = IF_Gbps(40ULL),
116 	},
117 	[MLX5E_40GBASE_LR4] = {
118 		.subtype = IFM_40G_LR4,
119 		.baudrate = IF_Gbps(40ULL),
120 	},
121 	[MLX5E_100GBASE_CR4] = {
122 		.subtype = IFM_100G_CR4,
123 		.baudrate = IF_Gbps(100ULL),
124 	},
125 	[MLX5E_100GBASE_SR4] = {
126 		.subtype = IFM_100G_SR4,
127 		.baudrate = IF_Gbps(100ULL),
128 	},
129 	[MLX5E_100GBASE_KR4] = {
130 		.subtype = IFM_100G_KR4,
131 		.baudrate = IF_Gbps(100ULL),
132 	},
133 	[MLX5E_100GBASE_LR4] = {
134 		.subtype = IFM_100G_LR4,
135 		.baudrate = IF_Gbps(100ULL),
136 	},
137 	[MLX5E_100BASE_TX] = {
138 		.subtype = IFM_100_TX,
139 		.baudrate = IF_Mbps(100ULL),
140 	},
141 	[MLX5E_100BASE_T] = {
142 		.subtype = IFM_100_T,
143 		.baudrate = IF_Mbps(100ULL),
144 	},
145 	[MLX5E_10GBASE_T] = {
146 		.subtype = IFM_10G_T,
147 		.baudrate = IF_Gbps(10ULL),
148 	},
149 	[MLX5E_25GBASE_CR] = {
150 		.subtype = IFM_25G_CR,
151 		.baudrate = IF_Gbps(25ULL),
152 	},
153 	[MLX5E_25GBASE_KR] = {
154 		.subtype = IFM_25G_KR,
155 		.baudrate = IF_Gbps(25ULL),
156 	},
157 	[MLX5E_25GBASE_SR] = {
158 		.subtype = IFM_25G_SR,
159 		.baudrate = IF_Gbps(25ULL),
160 	},
161 	[MLX5E_50GBASE_CR2] = {
162 		.subtype = IFM_50G_CR2,
163 		.baudrate = IF_Gbps(50ULL),
164 	},
165 	[MLX5E_50GBASE_KR2] = {
166 		.subtype = IFM_50G_KR2,
167 		.baudrate = IF_Gbps(50ULL),
168 	},
169 };
170 
171 MALLOC_DEFINE(M_MLX5EN, "MLX5EN", "MLX5 Ethernet");
172 
173 static void
174 mlx5e_update_carrier(struct mlx5e_priv *priv)
175 {
176 	struct mlx5_core_dev *mdev = priv->mdev;
177 	u32 out[MLX5_ST_SZ_DW(ptys_reg)];
178 	u32 eth_proto_oper;
179 	int error;
180 	u8 port_state;
181 	u8 i;
182 
183 	port_state = mlx5_query_vport_state(mdev,
184 	    MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT);
185 
186 	if (port_state == VPORT_STATE_UP) {
187 		priv->media_status_last |= IFM_ACTIVE;
188 	} else {
189 		priv->media_status_last &= ~IFM_ACTIVE;
190 		priv->media_active_last = IFM_ETHER;
191 		if_link_state_change(priv->ifp, LINK_STATE_DOWN);
192 		return;
193 	}
194 
195 	error = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN);
196 	if (error) {
197 		priv->media_active_last = IFM_ETHER;
198 		priv->ifp->if_baudrate = 1;
199 		if_printf(priv->ifp, "%s: query port ptys failed: 0x%x\n",
200 		    __func__, error);
201 		return;
202 	}
203 	eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper);
204 
205 	for (i = 0; i != MLX5E_LINK_MODES_NUMBER; i++) {
206 		if (mlx5e_mode_table[i].baudrate == 0)
207 			continue;
208 		if (MLX5E_PROT_MASK(i) & eth_proto_oper) {
209 			priv->ifp->if_baudrate =
210 			    mlx5e_mode_table[i].baudrate;
211 			priv->media_active_last =
212 			    mlx5e_mode_table[i].subtype | IFM_ETHER | IFM_FDX;
213 		}
214 	}
215 	if_link_state_change(priv->ifp, LINK_STATE_UP);
216 }
217 
218 static void
219 mlx5e_media_status(struct ifnet *dev, struct ifmediareq *ifmr)
220 {
221 	struct mlx5e_priv *priv = dev->if_softc;
222 
223 	ifmr->ifm_status = priv->media_status_last;
224 	ifmr->ifm_active = priv->media_active_last |
225 	    (priv->params_ethtool.rx_pauseframe_control ? IFM_ETH_RXPAUSE : 0) |
226 	    (priv->params_ethtool.tx_pauseframe_control ? IFM_ETH_TXPAUSE : 0);
227 
228 }
229 
230 static u32
231 mlx5e_find_link_mode(u32 subtype)
232 {
233 	u32 i;
234 	u32 link_mode = 0;
235 
236 	for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
237 		if (mlx5e_mode_table[i].baudrate == 0)
238 			continue;
239 		if (mlx5e_mode_table[i].subtype == subtype)
240 			link_mode |= MLX5E_PROT_MASK(i);
241 	}
242 
243 	return (link_mode);
244 }
245 
246 static int
247 mlx5e_media_change(struct ifnet *dev)
248 {
249 	struct mlx5e_priv *priv = dev->if_softc;
250 	struct mlx5_core_dev *mdev = priv->mdev;
251 	u32 eth_proto_cap;
252 	u32 link_mode;
253 	int locked;
254 	int error;
255 
256 	locked = PRIV_LOCKED(priv);
257 	if (!locked)
258 		PRIV_LOCK(priv);
259 
260 	if (IFM_TYPE(priv->media.ifm_media) != IFM_ETHER) {
261 		error = EINVAL;
262 		goto done;
263 	}
264 	link_mode = mlx5e_find_link_mode(IFM_SUBTYPE(priv->media.ifm_media));
265 
266 	error = mlx5_query_port_proto_cap(mdev, &eth_proto_cap, MLX5_PTYS_EN);
267 	if (error) {
268 		if_printf(dev, "Query port media capability failed\n");
269 		goto done;
270 	}
271 	if (IFM_SUBTYPE(priv->media.ifm_media) == IFM_AUTO)
272 		link_mode = eth_proto_cap;
273 	else
274 		link_mode = link_mode & eth_proto_cap;
275 
276 	if (!link_mode) {
277 		if_printf(dev, "Not supported link mode requested\n");
278 		error = EINVAL;
279 		goto done;
280 	}
281 	mlx5_set_port_status(mdev, MLX5_PORT_DOWN);
282 	mlx5_set_port_proto(mdev, link_mode, MLX5_PTYS_EN);
283 	mlx5_set_port_status(mdev, MLX5_PORT_UP);
284 
285 done:
286 	if (!locked)
287 		PRIV_UNLOCK(priv);
288 	return (error);
289 }
290 
291 static void
292 mlx5e_update_carrier_work(struct work_struct *work)
293 {
294 	struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
295 	    update_carrier_work);
296 
297 	PRIV_LOCK(priv);
298 	if (test_bit(MLX5E_STATE_OPENED, &priv->state))
299 		mlx5e_update_carrier(priv);
300 	PRIV_UNLOCK(priv);
301 }
302 
303 static void
304 mlx5e_update_pport_counters(struct mlx5e_priv *priv)
305 {
306 	struct mlx5_core_dev *mdev = priv->mdev;
307 	struct mlx5e_pport_stats *s = &priv->stats.pport;
308 	struct mlx5e_port_stats_debug *s_debug = &priv->stats.port_stats_debug;
309 	u32 *in;
310 	u32 *out;
311 	u64 *ptr;
312 	unsigned sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
313 	unsigned x;
314 	unsigned y;
315 
316 	in = mlx5_vzalloc(sz);
317 	out = mlx5_vzalloc(sz);
318 	if (in == NULL || out == NULL)
319 		goto free_out;
320 
321 	ptr = (uint64_t *)MLX5_ADDR_OF(ppcnt_reg, out, counter_set);
322 
323 	MLX5_SET(ppcnt_reg, in, local_port, 1);
324 
325 	MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP);
326 	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
327 	for (x = y = 0; x != MLX5E_PPORT_IEEE802_3_STATS_NUM; x++, y++)
328 		s->arg[y] = be64toh(ptr[x]);
329 
330 	MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP);
331 	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
332 	for (x = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM; x++, y++)
333 		s->arg[y] = be64toh(ptr[x]);
334 	for (y = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM +
335 	    MLX5E_PPORT_RFC2819_STATS_DEBUG_NUM; x++, y++)
336 		s_debug->arg[y] = be64toh(ptr[x]);
337 
338 	MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2863_COUNTERS_GROUP);
339 	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
340 	for (x = 0; x != MLX5E_PPORT_RFC2863_STATS_DEBUG_NUM; x++, y++)
341 		s_debug->arg[y] = be64toh(ptr[x]);
342 
343 	MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP);
344 	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
345 	for (x = 0; x != MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG_NUM; x++, y++)
346 		s_debug->arg[y] = be64toh(ptr[x]);
347 free_out:
348 	kvfree(in);
349 	kvfree(out);
350 }
351 
352 static void
353 mlx5e_update_stats_work(struct work_struct *work)
354 {
355 	struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
356 	    update_stats_work);
357 	struct mlx5_core_dev *mdev = priv->mdev;
358 	struct mlx5e_vport_stats *s = &priv->stats.vport;
359 	struct mlx5e_rq_stats *rq_stats;
360 	struct mlx5e_sq_stats *sq_stats;
361 	struct buf_ring *sq_br;
362 #if (__FreeBSD_version < 1100000)
363 	struct ifnet *ifp = priv->ifp;
364 #endif
365 
366 	u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)];
367 	u32 *out;
368 	int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out);
369 	u64 tso_packets = 0;
370 	u64 tso_bytes = 0;
371 	u64 tx_queue_dropped = 0;
372 	u64 tx_defragged = 0;
373 	u64 tx_offload_none = 0;
374 	u64 lro_packets = 0;
375 	u64 lro_bytes = 0;
376 	u64 sw_lro_queued = 0;
377 	u64 sw_lro_flushed = 0;
378 	u64 rx_csum_none = 0;
379 	u64 rx_wqe_err = 0;
380 	u32 rx_out_of_buffer = 0;
381 	int i;
382 	int j;
383 
384 	PRIV_LOCK(priv);
385 	out = mlx5_vzalloc(outlen);
386 	if (out == NULL)
387 		goto free_out;
388 	if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
389 		goto free_out;
390 
391 	/* Collect firts the SW counters and then HW for consistency */
392 	for (i = 0; i < priv->params.num_channels; i++) {
393 		struct mlx5e_rq *rq = &priv->channel[i]->rq;
394 
395 		rq_stats = &priv->channel[i]->rq.stats;
396 
397 		/* collect stats from LRO */
398 		rq_stats->sw_lro_queued = rq->lro.lro_queued;
399 		rq_stats->sw_lro_flushed = rq->lro.lro_flushed;
400 		sw_lro_queued += rq_stats->sw_lro_queued;
401 		sw_lro_flushed += rq_stats->sw_lro_flushed;
402 		lro_packets += rq_stats->lro_packets;
403 		lro_bytes += rq_stats->lro_bytes;
404 		rx_csum_none += rq_stats->csum_none;
405 		rx_wqe_err += rq_stats->wqe_err;
406 
407 		for (j = 0; j < priv->num_tc; j++) {
408 			sq_stats = &priv->channel[i]->sq[j].stats;
409 			sq_br = priv->channel[i]->sq[j].br;
410 
411 			tso_packets += sq_stats->tso_packets;
412 			tso_bytes += sq_stats->tso_bytes;
413 			tx_queue_dropped += sq_stats->dropped;
414 			tx_queue_dropped += sq_br->br_drops;
415 			tx_defragged += sq_stats->defragged;
416 			tx_offload_none += sq_stats->csum_offload_none;
417 		}
418 	}
419 
420 	/* update counters */
421 	s->tso_packets = tso_packets;
422 	s->tso_bytes = tso_bytes;
423 	s->tx_queue_dropped = tx_queue_dropped;
424 	s->tx_defragged = tx_defragged;
425 	s->lro_packets = lro_packets;
426 	s->lro_bytes = lro_bytes;
427 	s->sw_lro_queued = sw_lro_queued;
428 	s->sw_lro_flushed = sw_lro_flushed;
429 	s->rx_csum_none = rx_csum_none;
430 	s->rx_wqe_err = rx_wqe_err;
431 
432 	/* HW counters */
433 	memset(in, 0, sizeof(in));
434 
435 	MLX5_SET(query_vport_counter_in, in, opcode,
436 	    MLX5_CMD_OP_QUERY_VPORT_COUNTER);
437 	MLX5_SET(query_vport_counter_in, in, op_mod, 0);
438 	MLX5_SET(query_vport_counter_in, in, other_vport, 0);
439 
440 	memset(out, 0, outlen);
441 
442 	/* get number of out-of-buffer drops first */
443 	if (mlx5_vport_query_out_of_rx_buffer(mdev, priv->counter_set_id,
444 	    &rx_out_of_buffer))
445 		goto free_out;
446 
447 	/* accumulate difference into a 64-bit counter */
448 	s->rx_out_of_buffer += (u64)(u32)(rx_out_of_buffer - s->rx_out_of_buffer_prev);
449 	s->rx_out_of_buffer_prev = rx_out_of_buffer;
450 
451 	/* get port statistics */
452 	if (mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen))
453 		goto free_out;
454 
455 #define	MLX5_GET_CTR(out, x) \
456 	MLX5_GET64(query_vport_counter_out, out, x)
457 
458 	s->rx_error_packets =
459 	    MLX5_GET_CTR(out, received_errors.packets);
460 	s->rx_error_bytes =
461 	    MLX5_GET_CTR(out, received_errors.octets);
462 	s->tx_error_packets =
463 	    MLX5_GET_CTR(out, transmit_errors.packets);
464 	s->tx_error_bytes =
465 	    MLX5_GET_CTR(out, transmit_errors.octets);
466 
467 	s->rx_unicast_packets =
468 	    MLX5_GET_CTR(out, received_eth_unicast.packets);
469 	s->rx_unicast_bytes =
470 	    MLX5_GET_CTR(out, received_eth_unicast.octets);
471 	s->tx_unicast_packets =
472 	    MLX5_GET_CTR(out, transmitted_eth_unicast.packets);
473 	s->tx_unicast_bytes =
474 	    MLX5_GET_CTR(out, transmitted_eth_unicast.octets);
475 
476 	s->rx_multicast_packets =
477 	    MLX5_GET_CTR(out, received_eth_multicast.packets);
478 	s->rx_multicast_bytes =
479 	    MLX5_GET_CTR(out, received_eth_multicast.octets);
480 	s->tx_multicast_packets =
481 	    MLX5_GET_CTR(out, transmitted_eth_multicast.packets);
482 	s->tx_multicast_bytes =
483 	    MLX5_GET_CTR(out, transmitted_eth_multicast.octets);
484 
485 	s->rx_broadcast_packets =
486 	    MLX5_GET_CTR(out, received_eth_broadcast.packets);
487 	s->rx_broadcast_bytes =
488 	    MLX5_GET_CTR(out, received_eth_broadcast.octets);
489 	s->tx_broadcast_packets =
490 	    MLX5_GET_CTR(out, transmitted_eth_broadcast.packets);
491 	s->tx_broadcast_bytes =
492 	    MLX5_GET_CTR(out, transmitted_eth_broadcast.octets);
493 
494 	s->rx_packets =
495 	    s->rx_unicast_packets +
496 	    s->rx_multicast_packets +
497 	    s->rx_broadcast_packets -
498 	    s->rx_out_of_buffer;
499 	s->rx_bytes =
500 	    s->rx_unicast_bytes +
501 	    s->rx_multicast_bytes +
502 	    s->rx_broadcast_bytes;
503 	s->tx_packets =
504 	    s->tx_unicast_packets +
505 	    s->tx_multicast_packets +
506 	    s->tx_broadcast_packets;
507 	s->tx_bytes =
508 	    s->tx_unicast_bytes +
509 	    s->tx_multicast_bytes +
510 	    s->tx_broadcast_bytes;
511 
512 	/* Update calculated offload counters */
513 	s->tx_csum_offload = s->tx_packets - tx_offload_none;
514 	s->rx_csum_good = s->rx_packets - s->rx_csum_none;
515 
516 	/* Update per port counters */
517 	mlx5e_update_pport_counters(priv);
518 
519 #if (__FreeBSD_version < 1100000)
520 	/* no get_counters interface in fbsd 10 */
521 	ifp->if_ipackets = s->rx_packets;
522 	ifp->if_ierrors = s->rx_error_packets;
523 	ifp->if_iqdrops = s->rx_out_of_buffer;
524 	ifp->if_opackets = s->tx_packets;
525 	ifp->if_oerrors = s->tx_error_packets;
526 	ifp->if_snd.ifq_drops = s->tx_queue_dropped;
527 	ifp->if_ibytes = s->rx_bytes;
528 	ifp->if_obytes = s->tx_bytes;
529 #endif
530 
531 free_out:
532 	kvfree(out);
533 	PRIV_UNLOCK(priv);
534 }
535 
536 static void
537 mlx5e_update_stats(void *arg)
538 {
539 	struct mlx5e_priv *priv = arg;
540 
541 	schedule_work(&priv->update_stats_work);
542 
543 	callout_reset(&priv->watchdog, hz, &mlx5e_update_stats, priv);
544 }
545 
546 static void
547 mlx5e_async_event_sub(struct mlx5e_priv *priv,
548     enum mlx5_dev_event event)
549 {
550 	switch (event) {
551 	case MLX5_DEV_EVENT_PORT_UP:
552 	case MLX5_DEV_EVENT_PORT_DOWN:
553 		schedule_work(&priv->update_carrier_work);
554 		break;
555 
556 	default:
557 		break;
558 	}
559 }
560 
561 static void
562 mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv,
563     enum mlx5_dev_event event, unsigned long param)
564 {
565 	struct mlx5e_priv *priv = vpriv;
566 
567 	mtx_lock(&priv->async_events_mtx);
568 	if (test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state))
569 		mlx5e_async_event_sub(priv, event);
570 	mtx_unlock(&priv->async_events_mtx);
571 }
572 
573 static void
574 mlx5e_enable_async_events(struct mlx5e_priv *priv)
575 {
576 	set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
577 }
578 
579 static void
580 mlx5e_disable_async_events(struct mlx5e_priv *priv)
581 {
582 	mtx_lock(&priv->async_events_mtx);
583 	clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
584 	mtx_unlock(&priv->async_events_mtx);
585 }
586 
587 static const char *mlx5e_rq_stats_desc[] = {
588 	MLX5E_RQ_STATS(MLX5E_STATS_DESC)
589 };
590 
591 static int
592 mlx5e_create_rq(struct mlx5e_channel *c,
593     struct mlx5e_rq_param *param,
594     struct mlx5e_rq *rq)
595 {
596 	struct mlx5e_priv *priv = c->priv;
597 	struct mlx5_core_dev *mdev = priv->mdev;
598 	char buffer[16];
599 	void *rqc = param->rqc;
600 	void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
601 	int wq_sz;
602 	int err;
603 	int i;
604 
605 	/* Create DMA descriptor TAG */
606 	if ((err = -bus_dma_tag_create(
607 	    bus_get_dma_tag(mdev->pdev->dev.bsddev),
608 	    1,				/* any alignment */
609 	    0,				/* no boundary */
610 	    BUS_SPACE_MAXADDR,		/* lowaddr */
611 	    BUS_SPACE_MAXADDR,		/* highaddr */
612 	    NULL, NULL,			/* filter, filterarg */
613 	    MJUM16BYTES,		/* maxsize */
614 	    1,				/* nsegments */
615 	    MJUM16BYTES,		/* maxsegsize */
616 	    0,				/* flags */
617 	    NULL, NULL,			/* lockfunc, lockfuncarg */
618 	    &rq->dma_tag)))
619 		goto done;
620 
621 	err = mlx5_wq_ll_create(mdev, &param->wq, rqc_wq, &rq->wq,
622 	    &rq->wq_ctrl);
623 	if (err)
624 		goto err_free_dma_tag;
625 
626 	rq->wq.db = &rq->wq.db[MLX5_RCV_DBR];
627 
628 	if (priv->params.hw_lro_en) {
629 		rq->wqe_sz = priv->params.lro_wqe_sz;
630 	} else {
631 		rq->wqe_sz = MLX5E_SW2MB_MTU(priv->ifp->if_mtu);
632 	}
633 	if (rq->wqe_sz > MJUM16BYTES) {
634 		err = -ENOMEM;
635 		goto err_rq_wq_destroy;
636 	} else if (rq->wqe_sz > MJUM9BYTES) {
637 		rq->wqe_sz = MJUM16BYTES;
638 	} else if (rq->wqe_sz > MJUMPAGESIZE) {
639 		rq->wqe_sz = MJUM9BYTES;
640 	} else if (rq->wqe_sz > MCLBYTES) {
641 		rq->wqe_sz = MJUMPAGESIZE;
642 	} else {
643 		rq->wqe_sz = MCLBYTES;
644 	}
645 
646 	wq_sz = mlx5_wq_ll_get_size(&rq->wq);
647 	rq->mbuf = malloc(wq_sz * sizeof(rq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO);
648 	if (rq->mbuf == NULL) {
649 		err = -ENOMEM;
650 		goto err_rq_wq_destroy;
651 	}
652 	for (i = 0; i != wq_sz; i++) {
653 		struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i);
654 		uint32_t byte_count = rq->wqe_sz - MLX5E_NET_IP_ALIGN;
655 
656 		err = -bus_dmamap_create(rq->dma_tag, 0, &rq->mbuf[i].dma_map);
657 		if (err != 0) {
658 			while (i--)
659 				bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map);
660 			goto err_rq_mbuf_free;
661 		}
662 		wqe->data.lkey = c->mkey_be;
663 		wqe->data.byte_count = cpu_to_be32(byte_count | MLX5_HW_START_PADDING);
664 	}
665 
666 	rq->pdev = c->pdev;
667 	rq->ifp = c->ifp;
668 	rq->channel = c;
669 	rq->ix = c->ix;
670 
671 	snprintf(buffer, sizeof(buffer), "rxstat%d", c->ix);
672 	mlx5e_create_stats(&rq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
673 	    buffer, mlx5e_rq_stats_desc, MLX5E_RQ_STATS_NUM,
674 	    rq->stats.arg);
675 
676 #ifdef HAVE_TURBO_LRO
677 	if (tcp_tlro_init(&rq->lro, c->ifp, MLX5E_BUDGET_MAX) != 0)
678 		rq->lro.mbuf = NULL;
679 #else
680 	if (tcp_lro_init(&rq->lro))
681 		rq->lro.lro_cnt = 0;
682 	else
683 		rq->lro.ifp = c->ifp;
684 #endif
685 	return (0);
686 
687 err_rq_mbuf_free:
688 	free(rq->mbuf, M_MLX5EN);
689 err_rq_wq_destroy:
690 	mlx5_wq_destroy(&rq->wq_ctrl);
691 err_free_dma_tag:
692 	bus_dma_tag_destroy(rq->dma_tag);
693 done:
694 	return (err);
695 }
696 
697 static void
698 mlx5e_destroy_rq(struct mlx5e_rq *rq)
699 {
700 	int wq_sz;
701 	int i;
702 
703 	/* destroy all sysctl nodes */
704 	sysctl_ctx_free(&rq->stats.ctx);
705 
706 	/* free leftover LRO packets, if any */
707 #ifdef HAVE_TURBO_LRO
708 	tcp_tlro_free(&rq->lro);
709 #else
710 	tcp_lro_free(&rq->lro);
711 #endif
712 	wq_sz = mlx5_wq_ll_get_size(&rq->wq);
713 	for (i = 0; i != wq_sz; i++) {
714 		if (rq->mbuf[i].mbuf != NULL) {
715 			bus_dmamap_unload(rq->dma_tag,
716 			    rq->mbuf[i].dma_map);
717 			m_freem(rq->mbuf[i].mbuf);
718 		}
719 		bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map);
720 	}
721 	free(rq->mbuf, M_MLX5EN);
722 	mlx5_wq_destroy(&rq->wq_ctrl);
723 }
724 
725 static int
726 mlx5e_enable_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param)
727 {
728 	struct mlx5e_channel *c = rq->channel;
729 	struct mlx5e_priv *priv = c->priv;
730 	struct mlx5_core_dev *mdev = priv->mdev;
731 
732 	void *in;
733 	void *rqc;
734 	void *wq;
735 	int inlen;
736 	int err;
737 
738 	inlen = MLX5_ST_SZ_BYTES(create_rq_in) +
739 	    sizeof(u64) * rq->wq_ctrl.buf.npages;
740 	in = mlx5_vzalloc(inlen);
741 	if (in == NULL)
742 		return (-ENOMEM);
743 
744 	rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
745 	wq = MLX5_ADDR_OF(rqc, rqc, wq);
746 
747 	memcpy(rqc, param->rqc, sizeof(param->rqc));
748 
749 	MLX5_SET(rqc, rqc, cqn, c->rq.cq.mcq.cqn);
750 	MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
751 	MLX5_SET(rqc, rqc, flush_in_error_en, 1);
752 	if (priv->counter_set_id >= 0)
753 		MLX5_SET(rqc, rqc, counter_set_id, priv->counter_set_id);
754 	MLX5_SET(wq, wq, log_wq_pg_sz, rq->wq_ctrl.buf.page_shift -
755 	    PAGE_SHIFT);
756 	MLX5_SET64(wq, wq, dbr_addr, rq->wq_ctrl.db.dma);
757 
758 	mlx5_fill_page_array(&rq->wq_ctrl.buf,
759 	    (__be64 *) MLX5_ADDR_OF(wq, wq, pas));
760 
761 	err = mlx5_core_create_rq(mdev, in, inlen, &rq->rqn);
762 
763 	kvfree(in);
764 
765 	return (err);
766 }
767 
768 static int
769 mlx5e_modify_rq(struct mlx5e_rq *rq, int curr_state, int next_state)
770 {
771 	struct mlx5e_channel *c = rq->channel;
772 	struct mlx5e_priv *priv = c->priv;
773 	struct mlx5_core_dev *mdev = priv->mdev;
774 
775 	void *in;
776 	void *rqc;
777 	int inlen;
778 	int err;
779 
780 	inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
781 	in = mlx5_vzalloc(inlen);
782 	if (in == NULL)
783 		return (-ENOMEM);
784 
785 	rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
786 
787 	MLX5_SET(modify_rq_in, in, rqn, rq->rqn);
788 	MLX5_SET(modify_rq_in, in, rq_state, curr_state);
789 	MLX5_SET(rqc, rqc, state, next_state);
790 
791 	err = mlx5_core_modify_rq(mdev, in, inlen);
792 
793 	kvfree(in);
794 
795 	return (err);
796 }
797 
798 static void
799 mlx5e_disable_rq(struct mlx5e_rq *rq)
800 {
801 	struct mlx5e_channel *c = rq->channel;
802 	struct mlx5e_priv *priv = c->priv;
803 	struct mlx5_core_dev *mdev = priv->mdev;
804 
805 	mlx5_core_destroy_rq(mdev, rq->rqn);
806 }
807 
808 static int
809 mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq)
810 {
811 	struct mlx5e_channel *c = rq->channel;
812 	struct mlx5e_priv *priv = c->priv;
813 	struct mlx5_wq_ll *wq = &rq->wq;
814 	int i;
815 
816 	for (i = 0; i < 1000; i++) {
817 		if (wq->cur_sz >= priv->params.min_rx_wqes)
818 			return (0);
819 
820 		msleep(4);
821 	}
822 	return (-ETIMEDOUT);
823 }
824 
825 static int
826 mlx5e_open_rq(struct mlx5e_channel *c,
827     struct mlx5e_rq_param *param,
828     struct mlx5e_rq *rq)
829 {
830 	int err;
831 	int i;
832 
833 	err = mlx5e_create_rq(c, param, rq);
834 	if (err)
835 		return (err);
836 
837 	err = mlx5e_enable_rq(rq, param);
838 	if (err)
839 		goto err_destroy_rq;
840 
841 	err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
842 	if (err)
843 		goto err_disable_rq;
844 
845 	c->rq.enabled = 1;
846 
847 	/*
848 	 * Test send queues, which will trigger
849 	 * "mlx5e_post_rx_wqes()":
850 	 */
851 	for (i = 0; i != c->num_tc; i++)
852 		mlx5e_send_nop(&c->sq[i], 1, true);
853 	return (0);
854 
855 err_disable_rq:
856 	mlx5e_disable_rq(rq);
857 err_destroy_rq:
858 	mlx5e_destroy_rq(rq);
859 
860 	return (err);
861 }
862 
863 static void
864 mlx5e_close_rq(struct mlx5e_rq *rq)
865 {
866 	rq->enabled = 0;
867 	mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
868 }
869 
870 static void
871 mlx5e_close_rq_wait(struct mlx5e_rq *rq)
872 {
873 	/* wait till RQ is empty */
874 	while (!mlx5_wq_ll_is_empty(&rq->wq)) {
875 		msleep(4);
876 		rq->cq.mcq.comp(&rq->cq.mcq);
877 	}
878 
879 	mlx5e_disable_rq(rq);
880 	mlx5e_destroy_rq(rq);
881 }
882 
883 static void
884 mlx5e_free_sq_db(struct mlx5e_sq *sq)
885 {
886 	int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
887 	int x;
888 
889 	for (x = 0; x != wq_sz; x++)
890 		bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map);
891 	free(sq->mbuf, M_MLX5EN);
892 }
893 
894 static int
895 mlx5e_alloc_sq_db(struct mlx5e_sq *sq)
896 {
897 	int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
898 	int err;
899 	int x;
900 
901 	sq->mbuf = malloc(wq_sz * sizeof(sq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO);
902 	if (sq->mbuf == NULL)
903 		return (-ENOMEM);
904 
905 	/* Create DMA descriptor MAPs */
906 	for (x = 0; x != wq_sz; x++) {
907 		err = -bus_dmamap_create(sq->dma_tag, 0, &sq->mbuf[x].dma_map);
908 		if (err != 0) {
909 			while (x--)
910 				bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map);
911 			free(sq->mbuf, M_MLX5EN);
912 			return (err);
913 		}
914 	}
915 	return (0);
916 }
917 
918 static const char *mlx5e_sq_stats_desc[] = {
919 	MLX5E_SQ_STATS(MLX5E_STATS_DESC)
920 };
921 
922 static int
923 mlx5e_create_sq(struct mlx5e_channel *c,
924     int tc,
925     struct mlx5e_sq_param *param,
926     struct mlx5e_sq *sq)
927 {
928 	struct mlx5e_priv *priv = c->priv;
929 	struct mlx5_core_dev *mdev = priv->mdev;
930 	char buffer[16];
931 
932 	void *sqc = param->sqc;
933 	void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq);
934 #ifdef RSS
935 	cpuset_t cpu_mask;
936 	int cpu_id;
937 #endif
938 	int err;
939 
940 	/* Create DMA descriptor TAG */
941 	if ((err = -bus_dma_tag_create(
942 	    bus_get_dma_tag(mdev->pdev->dev.bsddev),
943 	    1,				/* any alignment */
944 	    0,				/* no boundary */
945 	    BUS_SPACE_MAXADDR,		/* lowaddr */
946 	    BUS_SPACE_MAXADDR,		/* highaddr */
947 	    NULL, NULL,			/* filter, filterarg */
948 	    MLX5E_MAX_TX_PAYLOAD_SIZE,	/* maxsize */
949 	    MLX5E_MAX_TX_MBUF_FRAGS,	/* nsegments */
950 	    MLX5E_MAX_TX_MBUF_SIZE,	/* maxsegsize */
951 	    0,				/* flags */
952 	    NULL, NULL,			/* lockfunc, lockfuncarg */
953 	    &sq->dma_tag)))
954 		goto done;
955 
956 	err = mlx5_alloc_map_uar(mdev, &sq->uar);
957 	if (err)
958 		goto err_free_dma_tag;
959 
960 	err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq,
961 	    &sq->wq_ctrl);
962 	if (err)
963 		goto err_unmap_free_uar;
964 
965 	sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
966 	sq->uar_map = sq->uar.map;
967 	sq->uar_bf_map = sq->uar.bf_map;
968 	sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2;
969 
970 	err = mlx5e_alloc_sq_db(sq);
971 	if (err)
972 		goto err_sq_wq_destroy;
973 
974 	sq->pdev = c->pdev;
975 	sq->mkey_be = c->mkey_be;
976 	sq->channel = c;
977 	sq->tc = tc;
978 
979 	sq->br = buf_ring_alloc(MLX5E_SQ_TX_QUEUE_SIZE, M_MLX5EN,
980 	    M_WAITOK, &sq->lock);
981 	if (sq->br == NULL) {
982 		if_printf(c->ifp, "%s: Failed allocating sq drbr buffer\n",
983 		    __func__);
984 		err = -ENOMEM;
985 		goto err_free_sq_db;
986 	}
987 
988 	sq->sq_tq = taskqueue_create_fast("mlx5e_que", M_WAITOK,
989 	    taskqueue_thread_enqueue, &sq->sq_tq);
990 	if (sq->sq_tq == NULL) {
991 		if_printf(c->ifp, "%s: Failed allocating taskqueue\n",
992 		    __func__);
993 		err = -ENOMEM;
994 		goto err_free_drbr;
995 	}
996 
997 	TASK_INIT(&sq->sq_task, 0, mlx5e_tx_que, sq);
998 #ifdef RSS
999 	cpu_id = rss_getcpu(c->ix % rss_getnumbuckets());
1000 	CPU_SETOF(cpu_id, &cpu_mask);
1001 	taskqueue_start_threads_cpuset(&sq->sq_tq, 1, PI_NET, &cpu_mask,
1002 	    "%s TX SQ%d.%d CPU%d", c->ifp->if_xname, c->ix, tc, cpu_id);
1003 #else
1004 	taskqueue_start_threads(&sq->sq_tq, 1, PI_NET,
1005 	    "%s TX SQ%d.%d", c->ifp->if_xname, c->ix, tc);
1006 #endif
1007 	snprintf(buffer, sizeof(buffer), "txstat%dtc%d", c->ix, tc);
1008 	mlx5e_create_stats(&sq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
1009 	    buffer, mlx5e_sq_stats_desc, MLX5E_SQ_STATS_NUM,
1010 	    sq->stats.arg);
1011 
1012 	return (0);
1013 
1014 err_free_drbr:
1015 	buf_ring_free(sq->br, M_MLX5EN);
1016 err_free_sq_db:
1017 	mlx5e_free_sq_db(sq);
1018 err_sq_wq_destroy:
1019 	mlx5_wq_destroy(&sq->wq_ctrl);
1020 
1021 err_unmap_free_uar:
1022 	mlx5_unmap_free_uar(mdev, &sq->uar);
1023 
1024 err_free_dma_tag:
1025 	bus_dma_tag_destroy(sq->dma_tag);
1026 done:
1027 	return (err);
1028 }
1029 
1030 static void
1031 mlx5e_destroy_sq(struct mlx5e_sq *sq)
1032 {
1033 	struct mlx5e_channel *c = sq->channel;
1034 	struct mlx5e_priv *priv = c->priv;
1035 
1036 	/* destroy all sysctl nodes */
1037 	sysctl_ctx_free(&sq->stats.ctx);
1038 
1039 	mlx5e_free_sq_db(sq);
1040 	mlx5_wq_destroy(&sq->wq_ctrl);
1041 	mlx5_unmap_free_uar(priv->mdev, &sq->uar);
1042 	taskqueue_drain(sq->sq_tq, &sq->sq_task);
1043 	taskqueue_free(sq->sq_tq);
1044 	buf_ring_free(sq->br, M_MLX5EN);
1045 }
1046 
1047 static int
1048 mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param)
1049 {
1050 	struct mlx5e_channel *c = sq->channel;
1051 	struct mlx5e_priv *priv = c->priv;
1052 	struct mlx5_core_dev *mdev = priv->mdev;
1053 
1054 	void *in;
1055 	void *sqc;
1056 	void *wq;
1057 	int inlen;
1058 	int err;
1059 
1060 	inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
1061 	    sizeof(u64) * sq->wq_ctrl.buf.npages;
1062 	in = mlx5_vzalloc(inlen);
1063 	if (in == NULL)
1064 		return (-ENOMEM);
1065 
1066 	sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
1067 	wq = MLX5_ADDR_OF(sqc, sqc, wq);
1068 
1069 	memcpy(sqc, param->sqc, sizeof(param->sqc));
1070 
1071 	MLX5_SET(sqc, sqc, tis_num_0, priv->tisn[sq->tc]);
1072 	MLX5_SET(sqc, sqc, cqn, c->sq[sq->tc].cq.mcq.cqn);
1073 	MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
1074 	MLX5_SET(sqc, sqc, tis_lst_sz, 1);
1075 	MLX5_SET(sqc, sqc, flush_in_error_en, 1);
1076 
1077 	MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
1078 	MLX5_SET(wq, wq, uar_page, sq->uar.index);
1079 	MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift -
1080 	    PAGE_SHIFT);
1081 	MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma);
1082 
1083 	mlx5_fill_page_array(&sq->wq_ctrl.buf,
1084 	    (__be64 *) MLX5_ADDR_OF(wq, wq, pas));
1085 
1086 	err = mlx5_core_create_sq(mdev, in, inlen, &sq->sqn);
1087 
1088 	kvfree(in);
1089 
1090 	return (err);
1091 }
1092 
1093 static int
1094 mlx5e_modify_sq(struct mlx5e_sq *sq, int curr_state, int next_state)
1095 {
1096 	struct mlx5e_channel *c = sq->channel;
1097 	struct mlx5e_priv *priv = c->priv;
1098 	struct mlx5_core_dev *mdev = priv->mdev;
1099 
1100 	void *in;
1101 	void *sqc;
1102 	int inlen;
1103 	int err;
1104 
1105 	inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
1106 	in = mlx5_vzalloc(inlen);
1107 	if (in == NULL)
1108 		return (-ENOMEM);
1109 
1110 	sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
1111 
1112 	MLX5_SET(modify_sq_in, in, sqn, sq->sqn);
1113 	MLX5_SET(modify_sq_in, in, sq_state, curr_state);
1114 	MLX5_SET(sqc, sqc, state, next_state);
1115 
1116 	err = mlx5_core_modify_sq(mdev, in, inlen);
1117 
1118 	kvfree(in);
1119 
1120 	return (err);
1121 }
1122 
1123 static void
1124 mlx5e_disable_sq(struct mlx5e_sq *sq)
1125 {
1126 	struct mlx5e_channel *c = sq->channel;
1127 	struct mlx5e_priv *priv = c->priv;
1128 	struct mlx5_core_dev *mdev = priv->mdev;
1129 
1130 	mlx5_core_destroy_sq(mdev, sq->sqn);
1131 }
1132 
1133 static int
1134 mlx5e_open_sq(struct mlx5e_channel *c,
1135     int tc,
1136     struct mlx5e_sq_param *param,
1137     struct mlx5e_sq *sq)
1138 {
1139 	int err;
1140 
1141 	err = mlx5e_create_sq(c, tc, param, sq);
1142 	if (err)
1143 		return (err);
1144 
1145 	err = mlx5e_enable_sq(sq, param);
1146 	if (err)
1147 		goto err_destroy_sq;
1148 
1149 	err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY);
1150 	if (err)
1151 		goto err_disable_sq;
1152 
1153 	atomic_store_rel_int(&sq->queue_state, MLX5E_SQ_READY);
1154 
1155 	return (0);
1156 
1157 err_disable_sq:
1158 	mlx5e_disable_sq(sq);
1159 err_destroy_sq:
1160 	mlx5e_destroy_sq(sq);
1161 
1162 	return (err);
1163 }
1164 
1165 static void
1166 mlx5e_close_sq(struct mlx5e_sq *sq)
1167 {
1168 
1169 	/* ensure hw is notified of all pending wqes */
1170 	if (mlx5e_sq_has_room_for(sq, 1))
1171 		mlx5e_send_nop(sq, 1, true);
1172 
1173 	mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR);
1174 }
1175 
1176 static void
1177 mlx5e_close_sq_wait(struct mlx5e_sq *sq)
1178 {
1179 	/* wait till SQ is empty */
1180 	while (sq->cc != sq->pc) {
1181 		msleep(4);
1182 		sq->cq.mcq.comp(&sq->cq.mcq);
1183 	}
1184 
1185 	mlx5e_disable_sq(sq);
1186 	mlx5e_destroy_sq(sq);
1187 }
1188 
1189 static int
1190 mlx5e_create_cq(struct mlx5e_channel *c,
1191     struct mlx5e_cq_param *param,
1192     struct mlx5e_cq *cq,
1193     mlx5e_cq_comp_t *comp)
1194 {
1195 	struct mlx5e_priv *priv = c->priv;
1196 	struct mlx5_core_dev *mdev = priv->mdev;
1197 	struct mlx5_core_cq *mcq = &cq->mcq;
1198 	int eqn_not_used;
1199 	int irqn;
1200 	int err;
1201 	u32 i;
1202 
1203 	param->wq.buf_numa_node = 0;
1204 	param->wq.db_numa_node = 0;
1205 	param->eq_ix = c->ix;
1206 
1207 	err = mlx5_cqwq_create(mdev, &param->wq, param->cqc, &cq->wq,
1208 	    &cq->wq_ctrl);
1209 	if (err)
1210 		return (err);
1211 
1212 	mlx5_vector2eqn(mdev, param->eq_ix, &eqn_not_used, &irqn);
1213 
1214 	mcq->cqe_sz = 64;
1215 	mcq->set_ci_db = cq->wq_ctrl.db.db;
1216 	mcq->arm_db = cq->wq_ctrl.db.db + 1;
1217 	*mcq->set_ci_db = 0;
1218 	*mcq->arm_db = 0;
1219 	mcq->vector = param->eq_ix;
1220 	mcq->comp = comp;
1221 	mcq->event = mlx5e_cq_error_event;
1222 	mcq->irqn = irqn;
1223 	mcq->uar = &priv->cq_uar;
1224 
1225 	for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
1226 		struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
1227 
1228 		cqe->op_own = 0xf1;
1229 	}
1230 
1231 	cq->channel = c;
1232 
1233 	return (0);
1234 }
1235 
1236 static void
1237 mlx5e_destroy_cq(struct mlx5e_cq *cq)
1238 {
1239 	mlx5_wq_destroy(&cq->wq_ctrl);
1240 }
1241 
1242 static int
1243 mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param,
1244     u8 moderation_mode)
1245 {
1246 	struct mlx5e_channel *c = cq->channel;
1247 	struct mlx5e_priv *priv = c->priv;
1248 	struct mlx5_core_dev *mdev = priv->mdev;
1249 	struct mlx5_core_cq *mcq = &cq->mcq;
1250 	void *in;
1251 	void *cqc;
1252 	int inlen;
1253 	int irqn_not_used;
1254 	int eqn;
1255 	int err;
1256 
1257 	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
1258 	    sizeof(u64) * cq->wq_ctrl.buf.npages;
1259 	in = mlx5_vzalloc(inlen);
1260 	if (in == NULL)
1261 		return (-ENOMEM);
1262 
1263 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
1264 
1265 	memcpy(cqc, param->cqc, sizeof(param->cqc));
1266 
1267 	mlx5_fill_page_array(&cq->wq_ctrl.buf,
1268 	    (__be64 *) MLX5_ADDR_OF(create_cq_in, in, pas));
1269 
1270 	mlx5_vector2eqn(mdev, param->eq_ix, &eqn, &irqn_not_used);
1271 
1272 	MLX5_SET(cqc, cqc, cq_period_mode, moderation_mode);
1273 	MLX5_SET(cqc, cqc, c_eqn, eqn);
1274 	MLX5_SET(cqc, cqc, uar_page, mcq->uar->index);
1275 	MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
1276 	    PAGE_SHIFT);
1277 	MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
1278 
1279 	err = mlx5_core_create_cq(mdev, mcq, in, inlen);
1280 
1281 	kvfree(in);
1282 
1283 	if (err)
1284 		return (err);
1285 
1286 	mlx5e_cq_arm(cq);
1287 
1288 	return (0);
1289 }
1290 
1291 static void
1292 mlx5e_disable_cq(struct mlx5e_cq *cq)
1293 {
1294 	struct mlx5e_channel *c = cq->channel;
1295 	struct mlx5e_priv *priv = c->priv;
1296 	struct mlx5_core_dev *mdev = priv->mdev;
1297 
1298 	mlx5_core_destroy_cq(mdev, &cq->mcq);
1299 }
1300 
1301 static int
1302 mlx5e_open_cq(struct mlx5e_channel *c,
1303     struct mlx5e_cq_param *param,
1304     struct mlx5e_cq *cq,
1305     mlx5e_cq_comp_t *comp,
1306     u8 moderation_mode)
1307 {
1308 	int err;
1309 
1310 	err = mlx5e_create_cq(c, param, cq, comp);
1311 	if (err)
1312 		return (err);
1313 
1314 	err = mlx5e_enable_cq(cq, param, moderation_mode);
1315 	if (err)
1316 		goto err_destroy_cq;
1317 
1318 	return (0);
1319 
1320 err_destroy_cq:
1321 	mlx5e_destroy_cq(cq);
1322 
1323 	return (err);
1324 }
1325 
1326 static void
1327 mlx5e_close_cq(struct mlx5e_cq *cq)
1328 {
1329 	mlx5e_disable_cq(cq);
1330 	mlx5e_destroy_cq(cq);
1331 }
1332 
1333 static int
1334 mlx5e_open_tx_cqs(struct mlx5e_channel *c,
1335     struct mlx5e_channel_param *cparam)
1336 {
1337 	u8 tx_moderation_mode;
1338 	int err;
1339 	int tc;
1340 
1341 	switch (c->priv->params.tx_cq_moderation_mode) {
1342 	case 0:
1343 		tx_moderation_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
1344 		break;
1345 	default:
1346 		if (MLX5_CAP_GEN(c->priv->mdev, cq_period_start_from_cqe))
1347 			tx_moderation_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE;
1348 		else
1349 			tx_moderation_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
1350 		break;
1351 	}
1352 	for (tc = 0; tc < c->num_tc; tc++) {
1353 		/* open completion queue */
1354 		err = mlx5e_open_cq(c, &cparam->tx_cq, &c->sq[tc].cq,
1355 		    &mlx5e_tx_cq_comp, tx_moderation_mode);
1356 		if (err)
1357 			goto err_close_tx_cqs;
1358 	}
1359 	return (0);
1360 
1361 err_close_tx_cqs:
1362 	for (tc--; tc >= 0; tc--)
1363 		mlx5e_close_cq(&c->sq[tc].cq);
1364 
1365 	return (err);
1366 }
1367 
1368 static void
1369 mlx5e_close_tx_cqs(struct mlx5e_channel *c)
1370 {
1371 	int tc;
1372 
1373 	for (tc = 0; tc < c->num_tc; tc++)
1374 		mlx5e_close_cq(&c->sq[tc].cq);
1375 }
1376 
1377 static int
1378 mlx5e_open_sqs(struct mlx5e_channel *c,
1379     struct mlx5e_channel_param *cparam)
1380 {
1381 	int err;
1382 	int tc;
1383 
1384 	for (tc = 0; tc < c->num_tc; tc++) {
1385 		err = mlx5e_open_sq(c, tc, &cparam->sq, &c->sq[tc]);
1386 		if (err)
1387 			goto err_close_sqs;
1388 	}
1389 
1390 	return (0);
1391 
1392 err_close_sqs:
1393 	for (tc--; tc >= 0; tc--) {
1394 		mlx5e_close_sq(&c->sq[tc]);
1395 		mlx5e_close_sq_wait(&c->sq[tc]);
1396 	}
1397 
1398 	return (err);
1399 }
1400 
1401 static void
1402 mlx5e_close_sqs(struct mlx5e_channel *c)
1403 {
1404 	int tc;
1405 
1406 	for (tc = 0; tc < c->num_tc; tc++)
1407 		mlx5e_close_sq(&c->sq[tc]);
1408 }
1409 
1410 static void
1411 mlx5e_close_sqs_wait(struct mlx5e_channel *c)
1412 {
1413 	int tc;
1414 
1415 	for (tc = 0; tc < c->num_tc; tc++)
1416 		mlx5e_close_sq_wait(&c->sq[tc]);
1417 }
1418 
1419 static void
1420 mlx5e_chan_mtx_init(struct mlx5e_channel *c)
1421 {
1422 	int tc;
1423 
1424 	mtx_init(&c->rq.mtx, "mlx5rx", MTX_NETWORK_LOCK, MTX_DEF);
1425 
1426 	for (tc = 0; tc < c->num_tc; tc++) {
1427 		mtx_init(&c->sq[tc].lock, "mlx5tx", MTX_NETWORK_LOCK, MTX_DEF);
1428 		mtx_init(&c->sq[tc].comp_lock, "mlx5comp", MTX_NETWORK_LOCK,
1429 		    MTX_DEF);
1430 	}
1431 }
1432 
1433 static void
1434 mlx5e_chan_mtx_destroy(struct mlx5e_channel *c)
1435 {
1436 	int tc;
1437 
1438 	mtx_destroy(&c->rq.mtx);
1439 
1440 	for (tc = 0; tc < c->num_tc; tc++) {
1441 		mtx_destroy(&c->sq[tc].lock);
1442 		mtx_destroy(&c->sq[tc].comp_lock);
1443 	}
1444 }
1445 
1446 static int
1447 mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
1448     struct mlx5e_channel_param *cparam,
1449     struct mlx5e_channel *volatile *cp)
1450 {
1451 	struct mlx5e_channel *c;
1452 	u8 rx_moderation_mode;
1453 	int err;
1454 
1455 	c = malloc(sizeof(*c), M_MLX5EN, M_WAITOK | M_ZERO);
1456 	if (c == NULL)
1457 		return (-ENOMEM);
1458 
1459 	c->priv = priv;
1460 	c->ix = ix;
1461 	c->cpu = 0;
1462 	c->pdev = &priv->mdev->pdev->dev;
1463 	c->ifp = priv->ifp;
1464 	c->mkey_be = cpu_to_be32(priv->mr.key);
1465 	c->num_tc = priv->num_tc;
1466 
1467 	/* init mutexes */
1468 	mlx5e_chan_mtx_init(c);
1469 
1470 	/* open transmit completion queue */
1471 	err = mlx5e_open_tx_cqs(c, cparam);
1472 	if (err)
1473 		goto err_free;
1474 
1475 	switch (priv->params.rx_cq_moderation_mode) {
1476 	case 0:
1477 		rx_moderation_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
1478 		break;
1479 	default:
1480 		if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
1481 			rx_moderation_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE;
1482 		else
1483 			rx_moderation_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
1484 		break;
1485 	}
1486 
1487 	/* open receive completion queue */
1488 	err = mlx5e_open_cq(c, &cparam->rx_cq, &c->rq.cq,
1489 	    &mlx5e_rx_cq_comp, rx_moderation_mode);
1490 	if (err)
1491 		goto err_close_tx_cqs;
1492 
1493 	err = mlx5e_open_sqs(c, cparam);
1494 	if (err)
1495 		goto err_close_rx_cq;
1496 
1497 	err = mlx5e_open_rq(c, &cparam->rq, &c->rq);
1498 	if (err)
1499 		goto err_close_sqs;
1500 
1501 	/* store channel pointer */
1502 	*cp = c;
1503 
1504 	/* poll receive queue initially */
1505 	c->rq.cq.mcq.comp(&c->rq.cq.mcq);
1506 
1507 	return (0);
1508 
1509 err_close_sqs:
1510 	mlx5e_close_sqs(c);
1511 	mlx5e_close_sqs_wait(c);
1512 
1513 err_close_rx_cq:
1514 	mlx5e_close_cq(&c->rq.cq);
1515 
1516 err_close_tx_cqs:
1517 	mlx5e_close_tx_cqs(c);
1518 
1519 err_free:
1520 	/* destroy mutexes */
1521 	mlx5e_chan_mtx_destroy(c);
1522 	free(c, M_MLX5EN);
1523 	return (err);
1524 }
1525 
1526 static void
1527 mlx5e_close_channel(struct mlx5e_channel *volatile *pp)
1528 {
1529 	struct mlx5e_channel *c = *pp;
1530 
1531 	/* check if channel is already closed */
1532 	if (c == NULL)
1533 		return;
1534 	mlx5e_close_rq(&c->rq);
1535 	mlx5e_close_sqs(c);
1536 }
1537 
1538 static void
1539 mlx5e_close_channel_wait(struct mlx5e_channel *volatile *pp)
1540 {
1541 	struct mlx5e_channel *c = *pp;
1542 
1543 	/* check if channel is already closed */
1544 	if (c == NULL)
1545 		return;
1546 	/* ensure channel pointer is no longer used */
1547 	*pp = NULL;
1548 
1549 	mlx5e_close_rq_wait(&c->rq);
1550 	mlx5e_close_sqs_wait(c);
1551 	mlx5e_close_cq(&c->rq.cq);
1552 	mlx5e_close_tx_cqs(c);
1553 	/* destroy mutexes */
1554 	mlx5e_chan_mtx_destroy(c);
1555 	free(c, M_MLX5EN);
1556 }
1557 
1558 static void
1559 mlx5e_build_rq_param(struct mlx5e_priv *priv,
1560     struct mlx5e_rq_param *param)
1561 {
1562 	void *rqc = param->rqc;
1563 	void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
1564 
1565 	MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST);
1566 	MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
1567 	MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe)));
1568 	MLX5_SET(wq, wq, log_wq_sz, priv->params.log_rq_size);
1569 	MLX5_SET(wq, wq, pd, priv->pdn);
1570 
1571 	param->wq.buf_numa_node = 0;
1572 	param->wq.db_numa_node = 0;
1573 	param->wq.linear = 1;
1574 }
1575 
1576 static void
1577 mlx5e_build_sq_param(struct mlx5e_priv *priv,
1578     struct mlx5e_sq_param *param)
1579 {
1580 	void *sqc = param->sqc;
1581 	void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
1582 
1583 	MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size);
1584 	MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
1585 	MLX5_SET(wq, wq, pd, priv->pdn);
1586 
1587 	param->wq.buf_numa_node = 0;
1588 	param->wq.db_numa_node = 0;
1589 	param->wq.linear = 1;
1590 }
1591 
1592 static void
1593 mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
1594     struct mlx5e_cq_param *param)
1595 {
1596 	void *cqc = param->cqc;
1597 
1598 	MLX5_SET(cqc, cqc, uar_page, priv->cq_uar.index);
1599 }
1600 
1601 static void
1602 mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
1603     struct mlx5e_cq_param *param)
1604 {
1605 	void *cqc = param->cqc;
1606 
1607 
1608 	/*
1609 	 * TODO The sysctl to control on/off is a bool value for now, which means
1610 	 * we only support CSUM, once HASH is implemnted we'll need to address that.
1611 	 */
1612 	if (priv->params.cqe_zipping_en) {
1613 		MLX5_SET(cqc, cqc, mini_cqe_res_format, MLX5_CQE_FORMAT_CSUM);
1614 		MLX5_SET(cqc, cqc, cqe_compression_en, 1);
1615 	}
1616 
1617 	MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_rq_size);
1618 	MLX5_SET(cqc, cqc, cq_period, priv->params.rx_cq_moderation_usec);
1619 	MLX5_SET(cqc, cqc, cq_max_count, priv->params.rx_cq_moderation_pkts);
1620 
1621 	mlx5e_build_common_cq_param(priv, param);
1622 }
1623 
1624 static void
1625 mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
1626     struct mlx5e_cq_param *param)
1627 {
1628 	void *cqc = param->cqc;
1629 
1630 	MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_sq_size);
1631 	MLX5_SET(cqc, cqc, cq_period, priv->params.tx_cq_moderation_usec);
1632 	MLX5_SET(cqc, cqc, cq_max_count, priv->params.tx_cq_moderation_pkts);
1633 
1634 	mlx5e_build_common_cq_param(priv, param);
1635 }
1636 
1637 static void
1638 mlx5e_build_channel_param(struct mlx5e_priv *priv,
1639     struct mlx5e_channel_param *cparam)
1640 {
1641 	memset(cparam, 0, sizeof(*cparam));
1642 
1643 	mlx5e_build_rq_param(priv, &cparam->rq);
1644 	mlx5e_build_sq_param(priv, &cparam->sq);
1645 	mlx5e_build_rx_cq_param(priv, &cparam->rx_cq);
1646 	mlx5e_build_tx_cq_param(priv, &cparam->tx_cq);
1647 }
1648 
1649 static int
1650 mlx5e_open_channels(struct mlx5e_priv *priv)
1651 {
1652 	struct mlx5e_channel_param cparam;
1653 	void *ptr;
1654 	int err;
1655 	int i;
1656 	int j;
1657 
1658 	priv->channel = malloc(priv->params.num_channels *
1659 	    sizeof(struct mlx5e_channel *), M_MLX5EN, M_WAITOK | M_ZERO);
1660 	if (priv->channel == NULL)
1661 		return (-ENOMEM);
1662 
1663 	mlx5e_build_channel_param(priv, &cparam);
1664 	for (i = 0; i < priv->params.num_channels; i++) {
1665 		err = mlx5e_open_channel(priv, i, &cparam, &priv->channel[i]);
1666 		if (err)
1667 			goto err_close_channels;
1668 	}
1669 
1670 	for (j = 0; j < priv->params.num_channels; j++) {
1671 		err = mlx5e_wait_for_min_rx_wqes(&priv->channel[j]->rq);
1672 		if (err)
1673 			goto err_close_channels;
1674 	}
1675 
1676 	return (0);
1677 
1678 err_close_channels:
1679 	for (i--; i >= 0; i--) {
1680 		mlx5e_close_channel(&priv->channel[i]);
1681 		mlx5e_close_channel_wait(&priv->channel[i]);
1682 	}
1683 
1684 	/* remove "volatile" attribute from "channel" pointer */
1685 	ptr = __DECONST(void *, priv->channel);
1686 	priv->channel = NULL;
1687 
1688 	free(ptr, M_MLX5EN);
1689 
1690 	return (err);
1691 }
1692 
1693 static void
1694 mlx5e_close_channels(struct mlx5e_priv *priv)
1695 {
1696 	void *ptr;
1697 	int i;
1698 
1699 	if (priv->channel == NULL)
1700 		return;
1701 
1702 	for (i = 0; i < priv->params.num_channels; i++)
1703 		mlx5e_close_channel(&priv->channel[i]);
1704 	for (i = 0; i < priv->params.num_channels; i++)
1705 		mlx5e_close_channel_wait(&priv->channel[i]);
1706 
1707 	/* remove "volatile" attribute from "channel" pointer */
1708 	ptr = __DECONST(void *, priv->channel);
1709 	priv->channel = NULL;
1710 
1711 	free(ptr, M_MLX5EN);
1712 }
1713 
1714 static int
1715 mlx5e_refresh_sq_params(struct mlx5e_priv *priv, struct mlx5e_sq *sq)
1716 {
1717 	return (mlx5_core_modify_cq_moderation(priv->mdev, &sq->cq.mcq,
1718 	    priv->params.tx_cq_moderation_usec,
1719 	    priv->params.tx_cq_moderation_pkts));
1720 }
1721 
1722 static int
1723 mlx5e_refresh_rq_params(struct mlx5e_priv *priv, struct mlx5e_rq *rq)
1724 {
1725 	return (mlx5_core_modify_cq_moderation(priv->mdev, &rq->cq.mcq,
1726 	    priv->params.rx_cq_moderation_usec,
1727 	    priv->params.rx_cq_moderation_pkts));
1728 }
1729 
1730 static int
1731 mlx5e_refresh_channel_params_sub(struct mlx5e_priv *priv, struct mlx5e_channel *c)
1732 {
1733 	int err;
1734 	int i;
1735 
1736 	if (c == NULL)
1737 		return (EINVAL);
1738 
1739 	err = mlx5e_refresh_rq_params(priv, &c->rq);
1740 	if (err)
1741 		goto done;
1742 
1743 	for (i = 0; i != c->num_tc; i++) {
1744 		err = mlx5e_refresh_sq_params(priv, &c->sq[i]);
1745 		if (err)
1746 			goto done;
1747 	}
1748 done:
1749 	return (err);
1750 }
1751 
1752 int
1753 mlx5e_refresh_channel_params(struct mlx5e_priv *priv)
1754 {
1755 	int i;
1756 
1757 	if (priv->channel == NULL)
1758 		return (EINVAL);
1759 
1760 	for (i = 0; i < priv->params.num_channels; i++) {
1761 		int err;
1762 
1763 		err = mlx5e_refresh_channel_params_sub(priv, priv->channel[i]);
1764 		if (err)
1765 			return (err);
1766 	}
1767 	return (0);
1768 }
1769 
1770 static int
1771 mlx5e_open_tis(struct mlx5e_priv *priv, int tc)
1772 {
1773 	struct mlx5_core_dev *mdev = priv->mdev;
1774 	u32 in[MLX5_ST_SZ_DW(create_tis_in)];
1775 	void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
1776 
1777 	memset(in, 0, sizeof(in));
1778 
1779 	MLX5_SET(tisc, tisc, prio, tc);
1780 	MLX5_SET(tisc, tisc, transport_domain, priv->tdn);
1781 
1782 	return (mlx5_core_create_tis(mdev, in, sizeof(in), &priv->tisn[tc]));
1783 }
1784 
1785 static void
1786 mlx5e_close_tis(struct mlx5e_priv *priv, int tc)
1787 {
1788 	mlx5_core_destroy_tis(priv->mdev, priv->tisn[tc]);
1789 }
1790 
1791 static int
1792 mlx5e_open_tises(struct mlx5e_priv *priv)
1793 {
1794 	int num_tc = priv->num_tc;
1795 	int err;
1796 	int tc;
1797 
1798 	for (tc = 0; tc < num_tc; tc++) {
1799 		err = mlx5e_open_tis(priv, tc);
1800 		if (err)
1801 			goto err_close_tises;
1802 	}
1803 
1804 	return (0);
1805 
1806 err_close_tises:
1807 	for (tc--; tc >= 0; tc--)
1808 		mlx5e_close_tis(priv, tc);
1809 
1810 	return (err);
1811 }
1812 
1813 static void
1814 mlx5e_close_tises(struct mlx5e_priv *priv)
1815 {
1816 	int num_tc = priv->num_tc;
1817 	int tc;
1818 
1819 	for (tc = 0; tc < num_tc; tc++)
1820 		mlx5e_close_tis(priv, tc);
1821 }
1822 
1823 static int
1824 mlx5e_open_rqt(struct mlx5e_priv *priv)
1825 {
1826 	struct mlx5_core_dev *mdev = priv->mdev;
1827 	u32 *in;
1828 	u32 out[MLX5_ST_SZ_DW(create_rqt_out)];
1829 	void *rqtc;
1830 	int inlen;
1831 	int err;
1832 	int sz;
1833 	int i;
1834 
1835 	sz = 1 << priv->params.rx_hash_log_tbl_sz;
1836 
1837 	inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
1838 	in = mlx5_vzalloc(inlen);
1839 	if (in == NULL)
1840 		return (-ENOMEM);
1841 	rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
1842 
1843 	MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
1844 	MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
1845 
1846 	for (i = 0; i < sz; i++) {
1847 		int ix;
1848 #ifdef RSS
1849 		ix = rss_get_indirection_to_bucket(i);
1850 #else
1851 		ix = i;
1852 #endif
1853 		/* ensure we don't overflow */
1854 		ix %= priv->params.num_channels;
1855 		MLX5_SET(rqtc, rqtc, rq_num[i], priv->channel[ix]->rq.rqn);
1856 	}
1857 
1858 	MLX5_SET(create_rqt_in, in, opcode, MLX5_CMD_OP_CREATE_RQT);
1859 
1860 	memset(out, 0, sizeof(out));
1861 	err = mlx5_cmd_exec_check_status(mdev, in, inlen, out, sizeof(out));
1862 	if (!err)
1863 		priv->rqtn = MLX5_GET(create_rqt_out, out, rqtn);
1864 
1865 	kvfree(in);
1866 
1867 	return (err);
1868 }
1869 
1870 static void
1871 mlx5e_close_rqt(struct mlx5e_priv *priv)
1872 {
1873 	u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)];
1874 	u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)];
1875 
1876 	memset(in, 0, sizeof(in));
1877 
1878 	MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT);
1879 	MLX5_SET(destroy_rqt_in, in, rqtn, priv->rqtn);
1880 
1881 	mlx5_cmd_exec_check_status(priv->mdev, in, sizeof(in), out,
1882 	    sizeof(out));
1883 }
1884 
1885 static void
1886 mlx5e_build_tir_ctx(struct mlx5e_priv *priv, u32 * tirc, int tt)
1887 {
1888 	void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
1889 	__be32 *hkey;
1890 
1891 	MLX5_SET(tirc, tirc, transport_domain, priv->tdn);
1892 
1893 #define	ROUGH_MAX_L2_L3_HDR_SZ 256
1894 
1895 #define	MLX5_HASH_IP     (MLX5_HASH_FIELD_SEL_SRC_IP   |\
1896 			  MLX5_HASH_FIELD_SEL_DST_IP)
1897 
1898 #define	MLX5_HASH_ALL    (MLX5_HASH_FIELD_SEL_SRC_IP   |\
1899 			  MLX5_HASH_FIELD_SEL_DST_IP   |\
1900 			  MLX5_HASH_FIELD_SEL_L4_SPORT |\
1901 			  MLX5_HASH_FIELD_SEL_L4_DPORT)
1902 
1903 #define	MLX5_HASH_IP_IPSEC_SPI	(MLX5_HASH_FIELD_SEL_SRC_IP   |\
1904 				 MLX5_HASH_FIELD_SEL_DST_IP   |\
1905 				 MLX5_HASH_FIELD_SEL_IPSEC_SPI)
1906 
1907 	if (priv->params.hw_lro_en) {
1908 		MLX5_SET(tirc, tirc, lro_enable_mask,
1909 		    MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO |
1910 		    MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO);
1911 		MLX5_SET(tirc, tirc, lro_max_msg_sz,
1912 		    (priv->params.lro_wqe_sz -
1913 		    ROUGH_MAX_L2_L3_HDR_SZ) >> 8);
1914 		/* TODO: add the option to choose timer value dynamically */
1915 		MLX5_SET(tirc, tirc, lro_timeout_period_usecs,
1916 		    MLX5_CAP_ETH(priv->mdev,
1917 		    lro_timer_supported_periods[2]));
1918 	}
1919 
1920 	/* setup parameters for hashing TIR type, if any */
1921 	switch (tt) {
1922 	case MLX5E_TT_ANY:
1923 		MLX5_SET(tirc, tirc, disp_type,
1924 		    MLX5_TIRC_DISP_TYPE_DIRECT);
1925 		MLX5_SET(tirc, tirc, inline_rqn,
1926 		    priv->channel[0]->rq.rqn);
1927 		break;
1928 	default:
1929 		MLX5_SET(tirc, tirc, disp_type,
1930 		    MLX5_TIRC_DISP_TYPE_INDIRECT);
1931 		MLX5_SET(tirc, tirc, indirect_table,
1932 		    priv->rqtn);
1933 		MLX5_SET(tirc, tirc, rx_hash_fn,
1934 		    MLX5_TIRC_RX_HASH_FN_HASH_TOEPLITZ);
1935 		hkey = (__be32 *) MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
1936 #ifdef RSS
1937 		/*
1938 		 * The FreeBSD RSS implementation does currently not
1939 		 * support symmetric Toeplitz hashes:
1940 		 */
1941 		MLX5_SET(tirc, tirc, rx_hash_symmetric, 0);
1942 		rss_getkey((uint8_t *)hkey);
1943 #else
1944 		MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
1945 		hkey[0] = cpu_to_be32(0xD181C62C);
1946 		hkey[1] = cpu_to_be32(0xF7F4DB5B);
1947 		hkey[2] = cpu_to_be32(0x1983A2FC);
1948 		hkey[3] = cpu_to_be32(0x943E1ADB);
1949 		hkey[4] = cpu_to_be32(0xD9389E6B);
1950 		hkey[5] = cpu_to_be32(0xD1039C2C);
1951 		hkey[6] = cpu_to_be32(0xA74499AD);
1952 		hkey[7] = cpu_to_be32(0x593D56D9);
1953 		hkey[8] = cpu_to_be32(0xF3253C06);
1954 		hkey[9] = cpu_to_be32(0x2ADC1FFC);
1955 #endif
1956 		break;
1957 	}
1958 
1959 	switch (tt) {
1960 	case MLX5E_TT_IPV4_TCP:
1961 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
1962 		    MLX5_L3_PROT_TYPE_IPV4);
1963 		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
1964 		    MLX5_L4_PROT_TYPE_TCP);
1965 #ifdef RSS
1966 		if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV4)) {
1967 			MLX5_SET(rx_hash_field_select, hfso, selected_fields,
1968 			    MLX5_HASH_IP);
1969 		} else
1970 #endif
1971 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
1972 		    MLX5_HASH_ALL);
1973 		break;
1974 
1975 	case MLX5E_TT_IPV6_TCP:
1976 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
1977 		    MLX5_L3_PROT_TYPE_IPV6);
1978 		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
1979 		    MLX5_L4_PROT_TYPE_TCP);
1980 #ifdef RSS
1981 		if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV6)) {
1982 			MLX5_SET(rx_hash_field_select, hfso, selected_fields,
1983 			    MLX5_HASH_IP);
1984 		} else
1985 #endif
1986 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
1987 		    MLX5_HASH_ALL);
1988 		break;
1989 
1990 	case MLX5E_TT_IPV4_UDP:
1991 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
1992 		    MLX5_L3_PROT_TYPE_IPV4);
1993 		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
1994 		    MLX5_L4_PROT_TYPE_UDP);
1995 #ifdef RSS
1996 		if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV4)) {
1997 			MLX5_SET(rx_hash_field_select, hfso, selected_fields,
1998 			    MLX5_HASH_IP);
1999 		} else
2000 #endif
2001 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2002 		    MLX5_HASH_ALL);
2003 		break;
2004 
2005 	case MLX5E_TT_IPV6_UDP:
2006 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2007 		    MLX5_L3_PROT_TYPE_IPV6);
2008 		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2009 		    MLX5_L4_PROT_TYPE_UDP);
2010 #ifdef RSS
2011 		if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV6)) {
2012 			MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2013 			    MLX5_HASH_IP);
2014 		} else
2015 #endif
2016 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2017 		    MLX5_HASH_ALL);
2018 		break;
2019 
2020 	case MLX5E_TT_IPV4_IPSEC_AH:
2021 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2022 		    MLX5_L3_PROT_TYPE_IPV4);
2023 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2024 		    MLX5_HASH_IP_IPSEC_SPI);
2025 		break;
2026 
2027 	case MLX5E_TT_IPV6_IPSEC_AH:
2028 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2029 		    MLX5_L3_PROT_TYPE_IPV6);
2030 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2031 		    MLX5_HASH_IP_IPSEC_SPI);
2032 		break;
2033 
2034 	case MLX5E_TT_IPV4_IPSEC_ESP:
2035 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2036 		    MLX5_L3_PROT_TYPE_IPV4);
2037 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2038 		    MLX5_HASH_IP_IPSEC_SPI);
2039 		break;
2040 
2041 	case MLX5E_TT_IPV6_IPSEC_ESP:
2042 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2043 		    MLX5_L3_PROT_TYPE_IPV6);
2044 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2045 		    MLX5_HASH_IP_IPSEC_SPI);
2046 		break;
2047 
2048 	case MLX5E_TT_IPV4:
2049 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2050 		    MLX5_L3_PROT_TYPE_IPV4);
2051 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2052 		    MLX5_HASH_IP);
2053 		break;
2054 
2055 	case MLX5E_TT_IPV6:
2056 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2057 		    MLX5_L3_PROT_TYPE_IPV6);
2058 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2059 		    MLX5_HASH_IP);
2060 		break;
2061 
2062 	default:
2063 		break;
2064 	}
2065 }
2066 
2067 static int
2068 mlx5e_open_tir(struct mlx5e_priv *priv, int tt)
2069 {
2070 	struct mlx5_core_dev *mdev = priv->mdev;
2071 	u32 *in;
2072 	void *tirc;
2073 	int inlen;
2074 	int err;
2075 
2076 	inlen = MLX5_ST_SZ_BYTES(create_tir_in);
2077 	in = mlx5_vzalloc(inlen);
2078 	if (in == NULL)
2079 		return (-ENOMEM);
2080 	tirc = MLX5_ADDR_OF(create_tir_in, in, tir_context);
2081 
2082 	mlx5e_build_tir_ctx(priv, tirc, tt);
2083 
2084 	err = mlx5_core_create_tir(mdev, in, inlen, &priv->tirn[tt]);
2085 
2086 	kvfree(in);
2087 
2088 	return (err);
2089 }
2090 
2091 static void
2092 mlx5e_close_tir(struct mlx5e_priv *priv, int tt)
2093 {
2094 	mlx5_core_destroy_tir(priv->mdev, priv->tirn[tt]);
2095 }
2096 
2097 static int
2098 mlx5e_open_tirs(struct mlx5e_priv *priv)
2099 {
2100 	int err;
2101 	int i;
2102 
2103 	for (i = 0; i < MLX5E_NUM_TT; i++) {
2104 		err = mlx5e_open_tir(priv, i);
2105 		if (err)
2106 			goto err_close_tirs;
2107 	}
2108 
2109 	return (0);
2110 
2111 err_close_tirs:
2112 	for (i--; i >= 0; i--)
2113 		mlx5e_close_tir(priv, i);
2114 
2115 	return (err);
2116 }
2117 
2118 static void
2119 mlx5e_close_tirs(struct mlx5e_priv *priv)
2120 {
2121 	int i;
2122 
2123 	for (i = 0; i < MLX5E_NUM_TT; i++)
2124 		mlx5e_close_tir(priv, i);
2125 }
2126 
2127 /*
2128  * SW MTU does not include headers,
2129  * HW MTU includes all headers and checksums.
2130  */
2131 static int
2132 mlx5e_set_dev_port_mtu(struct ifnet *ifp, int sw_mtu)
2133 {
2134 	struct mlx5e_priv *priv = ifp->if_softc;
2135 	struct mlx5_core_dev *mdev = priv->mdev;
2136 	int hw_mtu;
2137 	int err;
2138 
2139 
2140 	err = mlx5_set_port_mtu(mdev, MLX5E_SW2HW_MTU(sw_mtu));
2141 	if (err) {
2142 		if_printf(ifp, "%s: mlx5_set_port_mtu failed setting %d, err=%d\n",
2143 		    __func__, sw_mtu, err);
2144 		return (err);
2145 	}
2146 	err = mlx5_query_port_oper_mtu(mdev, &hw_mtu);
2147 	if (!err) {
2148 		ifp->if_mtu = MLX5E_HW2SW_MTU(hw_mtu);
2149 
2150 		if (ifp->if_mtu != sw_mtu) {
2151 			if_printf(ifp, "Port MTU %d is different than "
2152 			    "ifp mtu %d\n", sw_mtu, (int)ifp->if_mtu);
2153 		}
2154 	} else {
2155 		if_printf(ifp, "Query port MTU, after setting new "
2156 		    "MTU value, failed\n");
2157 		ifp->if_mtu = sw_mtu;
2158 	}
2159 	return (0);
2160 }
2161 
2162 int
2163 mlx5e_open_locked(struct ifnet *ifp)
2164 {
2165 	struct mlx5e_priv *priv = ifp->if_softc;
2166 	int err;
2167 
2168 	/* check if already opened */
2169 	if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0)
2170 		return (0);
2171 
2172 #ifdef RSS
2173 	if (rss_getnumbuckets() > priv->params.num_channels) {
2174 		if_printf(ifp, "NOTE: There are more RSS buckets(%u) than "
2175 		    "channels(%u) available\n", rss_getnumbuckets(),
2176 		    priv->params.num_channels);
2177 	}
2178 #endif
2179 	err = mlx5e_open_tises(priv);
2180 	if (err) {
2181 		if_printf(ifp, "%s: mlx5e_open_tises failed, %d\n",
2182 		    __func__, err);
2183 		return (err);
2184 	}
2185 	err = mlx5_vport_alloc_q_counter(priv->mdev, &priv->counter_set_id);
2186 	if (err) {
2187 		if_printf(priv->ifp,
2188 		    "%s: mlx5_vport_alloc_q_counter failed: %d\n",
2189 		    __func__, err);
2190 		goto err_close_tises;
2191 	}
2192 	err = mlx5e_open_channels(priv);
2193 	if (err) {
2194 		if_printf(ifp, "%s: mlx5e_open_channels failed, %d\n",
2195 		    __func__, err);
2196 		goto err_dalloc_q_counter;
2197 	}
2198 	err = mlx5e_open_rqt(priv);
2199 	if (err) {
2200 		if_printf(ifp, "%s: mlx5e_open_rqt failed, %d\n",
2201 		    __func__, err);
2202 		goto err_close_channels;
2203 	}
2204 	err = mlx5e_open_tirs(priv);
2205 	if (err) {
2206 		if_printf(ifp, "%s: mlx5e_open_tir failed, %d\n",
2207 		    __func__, err);
2208 		goto err_close_rqls;
2209 	}
2210 	err = mlx5e_open_flow_table(priv);
2211 	if (err) {
2212 		if_printf(ifp, "%s: mlx5e_open_flow_table failed, %d\n",
2213 		    __func__, err);
2214 		goto err_close_tirs;
2215 	}
2216 	err = mlx5e_add_all_vlan_rules(priv);
2217 	if (err) {
2218 		if_printf(ifp, "%s: mlx5e_add_all_vlan_rules failed, %d\n",
2219 		    __func__, err);
2220 		goto err_close_flow_table;
2221 	}
2222 	set_bit(MLX5E_STATE_OPENED, &priv->state);
2223 
2224 	mlx5e_update_carrier(priv);
2225 	mlx5e_set_rx_mode_core(priv);
2226 
2227 	return (0);
2228 
2229 err_close_flow_table:
2230 	mlx5e_close_flow_table(priv);
2231 
2232 err_close_tirs:
2233 	mlx5e_close_tirs(priv);
2234 
2235 err_close_rqls:
2236 	mlx5e_close_rqt(priv);
2237 
2238 err_close_channels:
2239 	mlx5e_close_channels(priv);
2240 
2241 err_dalloc_q_counter:
2242 	mlx5_vport_dealloc_q_counter(priv->mdev, priv->counter_set_id);
2243 
2244 err_close_tises:
2245 	mlx5e_close_tises(priv);
2246 
2247 	return (err);
2248 }
2249 
2250 static void
2251 mlx5e_open(void *arg)
2252 {
2253 	struct mlx5e_priv *priv = arg;
2254 
2255 	PRIV_LOCK(priv);
2256 	if (mlx5_set_port_status(priv->mdev, MLX5_PORT_UP))
2257 		if_printf(priv->ifp,
2258 		    "%s: Setting port status to up failed\n",
2259 		    __func__);
2260 
2261 	mlx5e_open_locked(priv->ifp);
2262 	priv->ifp->if_drv_flags |= IFF_DRV_RUNNING;
2263 	PRIV_UNLOCK(priv);
2264 }
2265 
2266 int
2267 mlx5e_close_locked(struct ifnet *ifp)
2268 {
2269 	struct mlx5e_priv *priv = ifp->if_softc;
2270 
2271 	/* check if already closed */
2272 	if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
2273 		return (0);
2274 
2275 	clear_bit(MLX5E_STATE_OPENED, &priv->state);
2276 
2277 	mlx5e_set_rx_mode_core(priv);
2278 	mlx5e_del_all_vlan_rules(priv);
2279 	if_link_state_change(priv->ifp, LINK_STATE_DOWN);
2280 	mlx5e_close_flow_table(priv);
2281 	mlx5e_close_tirs(priv);
2282 	mlx5e_close_rqt(priv);
2283 	mlx5e_close_channels(priv);
2284 	mlx5_vport_dealloc_q_counter(priv->mdev, priv->counter_set_id);
2285 	mlx5e_close_tises(priv);
2286 
2287 	return (0);
2288 }
2289 
2290 #if (__FreeBSD_version >= 1100000)
2291 static uint64_t
2292 mlx5e_get_counter(struct ifnet *ifp, ift_counter cnt)
2293 {
2294 	struct mlx5e_priv *priv = ifp->if_softc;
2295 	u64 retval;
2296 
2297 	/* PRIV_LOCK(priv); XXX not allowed */
2298 	switch (cnt) {
2299 	case IFCOUNTER_IPACKETS:
2300 		retval = priv->stats.vport.rx_packets;
2301 		break;
2302 	case IFCOUNTER_IERRORS:
2303 		retval = priv->stats.vport.rx_error_packets;
2304 		break;
2305 	case IFCOUNTER_IQDROPS:
2306 		retval = priv->stats.vport.rx_out_of_buffer;
2307 		break;
2308 	case IFCOUNTER_OPACKETS:
2309 		retval = priv->stats.vport.tx_packets;
2310 		break;
2311 	case IFCOUNTER_OERRORS:
2312 		retval = priv->stats.vport.tx_error_packets;
2313 		break;
2314 	case IFCOUNTER_IBYTES:
2315 		retval = priv->stats.vport.rx_bytes;
2316 		break;
2317 	case IFCOUNTER_OBYTES:
2318 		retval = priv->stats.vport.tx_bytes;
2319 		break;
2320 	case IFCOUNTER_IMCASTS:
2321 		retval = priv->stats.vport.rx_multicast_packets;
2322 		break;
2323 	case IFCOUNTER_OMCASTS:
2324 		retval = priv->stats.vport.tx_multicast_packets;
2325 		break;
2326 	case IFCOUNTER_OQDROPS:
2327 		retval = priv->stats.vport.tx_queue_dropped;
2328 		break;
2329 	default:
2330 		retval = if_get_counter_default(ifp, cnt);
2331 		break;
2332 	}
2333 	/* PRIV_UNLOCK(priv); XXX not allowed */
2334 	return (retval);
2335 }
2336 #endif
2337 
2338 static void
2339 mlx5e_set_rx_mode(struct ifnet *ifp)
2340 {
2341 	struct mlx5e_priv *priv = ifp->if_softc;
2342 
2343 	schedule_work(&priv->set_rx_mode_work);
2344 }
2345 
2346 static int
2347 mlx5e_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
2348 {
2349 	struct mlx5e_priv *priv;
2350 	struct ifreq *ifr;
2351 	struct ifi2creq i2c;
2352 	int error = 0;
2353 	int mask = 0;
2354 	int size_read = 0;
2355 	int module_num;
2356 	int max_mtu;
2357 	uint8_t read_addr;
2358 
2359 	priv = ifp->if_softc;
2360 
2361 	/* check if detaching */
2362 	if (priv == NULL || priv->gone != 0)
2363 		return (ENXIO);
2364 
2365 	switch (command) {
2366 	case SIOCSIFMTU:
2367 		ifr = (struct ifreq *)data;
2368 
2369 		PRIV_LOCK(priv);
2370 		mlx5_query_port_max_mtu(priv->mdev, &max_mtu);
2371 
2372 		if (ifr->ifr_mtu >= MLX5E_MTU_MIN &&
2373 		    ifr->ifr_mtu <= MIN(MLX5E_MTU_MAX, max_mtu)) {
2374 			int was_opened;
2375 
2376 			was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
2377 			if (was_opened)
2378 				mlx5e_close_locked(ifp);
2379 
2380 			/* set new MTU */
2381 			mlx5e_set_dev_port_mtu(ifp, ifr->ifr_mtu);
2382 
2383 			if (was_opened)
2384 				mlx5e_open_locked(ifp);
2385 		} else {
2386 			error = EINVAL;
2387 			if_printf(ifp, "Invalid MTU value. Min val: %d, Max val: %d\n",
2388 			    MLX5E_MTU_MIN, MIN(MLX5E_MTU_MAX, max_mtu));
2389 		}
2390 		PRIV_UNLOCK(priv);
2391 		break;
2392 	case SIOCSIFFLAGS:
2393 		if ((ifp->if_flags & IFF_UP) &&
2394 		    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
2395 			mlx5e_set_rx_mode(ifp);
2396 			break;
2397 		}
2398 		PRIV_LOCK(priv);
2399 		if (ifp->if_flags & IFF_UP) {
2400 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2401 				if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
2402 					mlx5e_open_locked(ifp);
2403 				ifp->if_drv_flags |= IFF_DRV_RUNNING;
2404 				mlx5_set_port_status(priv->mdev, MLX5_PORT_UP);
2405 			}
2406 		} else {
2407 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2408 				mlx5_set_port_status(priv->mdev,
2409 				    MLX5_PORT_DOWN);
2410 				if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0)
2411 					mlx5e_close_locked(ifp);
2412 				mlx5e_update_carrier(priv);
2413 				ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2414 			}
2415 		}
2416 		PRIV_UNLOCK(priv);
2417 		break;
2418 	case SIOCADDMULTI:
2419 	case SIOCDELMULTI:
2420 		mlx5e_set_rx_mode(ifp);
2421 		break;
2422 	case SIOCSIFMEDIA:
2423 	case SIOCGIFMEDIA:
2424 	case SIOCGIFXMEDIA:
2425 		ifr = (struct ifreq *)data;
2426 		error = ifmedia_ioctl(ifp, ifr, &priv->media, command);
2427 		break;
2428 	case SIOCSIFCAP:
2429 		ifr = (struct ifreq *)data;
2430 		PRIV_LOCK(priv);
2431 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2432 
2433 		if (mask & IFCAP_TXCSUM) {
2434 			ifp->if_capenable ^= IFCAP_TXCSUM;
2435 			ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
2436 
2437 			if (IFCAP_TSO4 & ifp->if_capenable &&
2438 			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
2439 				ifp->if_capenable &= ~IFCAP_TSO4;
2440 				ifp->if_hwassist &= ~CSUM_IP_TSO;
2441 				if_printf(ifp,
2442 				    "tso4 disabled due to -txcsum.\n");
2443 			}
2444 		}
2445 		if (mask & IFCAP_TXCSUM_IPV6) {
2446 			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
2447 			ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
2448 
2449 			if (IFCAP_TSO6 & ifp->if_capenable &&
2450 			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
2451 				ifp->if_capenable &= ~IFCAP_TSO6;
2452 				ifp->if_hwassist &= ~CSUM_IP6_TSO;
2453 				if_printf(ifp,
2454 				    "tso6 disabled due to -txcsum6.\n");
2455 			}
2456 		}
2457 		if (mask & IFCAP_RXCSUM)
2458 			ifp->if_capenable ^= IFCAP_RXCSUM;
2459 		if (mask & IFCAP_RXCSUM_IPV6)
2460 			ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
2461 		if (mask & IFCAP_TSO4) {
2462 			if (!(IFCAP_TSO4 & ifp->if_capenable) &&
2463 			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
2464 				if_printf(ifp, "enable txcsum first.\n");
2465 				error = EAGAIN;
2466 				goto out;
2467 			}
2468 			ifp->if_capenable ^= IFCAP_TSO4;
2469 			ifp->if_hwassist ^= CSUM_IP_TSO;
2470 		}
2471 		if (mask & IFCAP_TSO6) {
2472 			if (!(IFCAP_TSO6 & ifp->if_capenable) &&
2473 			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
2474 				if_printf(ifp, "enable txcsum6 first.\n");
2475 				error = EAGAIN;
2476 				goto out;
2477 			}
2478 			ifp->if_capenable ^= IFCAP_TSO6;
2479 			ifp->if_hwassist ^= CSUM_IP6_TSO;
2480 		}
2481 		if (mask & IFCAP_VLAN_HWFILTER) {
2482 			if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
2483 				mlx5e_disable_vlan_filter(priv);
2484 			else
2485 				mlx5e_enable_vlan_filter(priv);
2486 
2487 			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
2488 		}
2489 		if (mask & IFCAP_VLAN_HWTAGGING)
2490 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2491 		if (mask & IFCAP_WOL_MAGIC)
2492 			ifp->if_capenable ^= IFCAP_WOL_MAGIC;
2493 
2494 		VLAN_CAPABILITIES(ifp);
2495 		/* turn off LRO means also turn of HW LRO - if it's on */
2496 		if (mask & IFCAP_LRO) {
2497 			int was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
2498 			bool need_restart = false;
2499 
2500 			ifp->if_capenable ^= IFCAP_LRO;
2501 			if (!(ifp->if_capenable & IFCAP_LRO)) {
2502 				if (priv->params.hw_lro_en) {
2503 					priv->params.hw_lro_en = false;
2504 					need_restart = true;
2505 					/* Not sure this is the correct way */
2506 					priv->params_ethtool.hw_lro = priv->params.hw_lro_en;
2507 				}
2508 			}
2509 			if (was_opened && need_restart) {
2510 				mlx5e_close_locked(ifp);
2511 				mlx5e_open_locked(ifp);
2512 			}
2513 		}
2514 out:
2515 		PRIV_UNLOCK(priv);
2516 		break;
2517 
2518 	case SIOCGI2C:
2519 		ifr = (struct ifreq *)data;
2520 
2521 		/*
2522 		 * Copy from the user-space address ifr_data to the
2523 		 * kernel-space address i2c
2524 		 */
2525 		error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
2526 		if (error)
2527 			break;
2528 
2529 		if (i2c.len > sizeof(i2c.data)) {
2530 			error = EINVAL;
2531 			break;
2532 		}
2533 
2534 		PRIV_LOCK(priv);
2535 		/* Get module_num which is required for the query_eeprom */
2536 		error = mlx5_query_module_num(priv->mdev, &module_num);
2537 		if (error) {
2538 			if_printf(ifp, "Query module num failed, eeprom "
2539 			    "reading is not supported\n");
2540 			goto err_i2c;
2541 		}
2542 
2543 		/*
2544 		 * Currently 0XA0 and 0xA2 are the only addresses permitted.
2545 		 * The internal conversion is as follows:
2546 		 */
2547 		if (i2c.dev_addr == 0xA0)
2548 			read_addr = MLX5E_I2C_ADDR_LOW;
2549 		else if (i2c.dev_addr == 0xA2)
2550 			read_addr = MLX5E_I2C_ADDR_HIGH;
2551 		else {
2552 			if_printf(ifp, "Query eeprom failed, "
2553 			    "Invalid Address: %X\n", i2c.dev_addr);
2554 			error = EINVAL;
2555 			goto err_i2c;
2556 		}
2557 		error = mlx5_query_eeprom(priv->mdev,
2558 		    read_addr, MLX5E_EEPROM_LOW_PAGE,
2559 		    (uint32_t)i2c.offset, (uint32_t)i2c.len, module_num,
2560 		    (uint32_t *)i2c.data, &size_read);
2561 		if (error) {
2562 			if_printf(ifp, "Query eeprom failed, eeprom "
2563 			    "reading is not supported\n");
2564 			goto err_i2c;
2565 		}
2566 
2567 		if (i2c.len > MLX5_EEPROM_MAX_BYTES) {
2568 			error = mlx5_query_eeprom(priv->mdev,
2569 			    read_addr, MLX5E_EEPROM_LOW_PAGE,
2570 			    (uint32_t)(i2c.offset + size_read),
2571 			    (uint32_t)(i2c.len - size_read), module_num,
2572 			    (uint32_t *)(i2c.data + size_read), &size_read);
2573 		}
2574 		if (error) {
2575 			if_printf(ifp, "Query eeprom failed, eeprom "
2576 			    "reading is not supported\n");
2577 			goto err_i2c;
2578 		}
2579 
2580 		error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
2581 err_i2c:
2582 		PRIV_UNLOCK(priv);
2583 		break;
2584 
2585 	default:
2586 		error = ether_ioctl(ifp, command, data);
2587 		break;
2588 	}
2589 	return (error);
2590 }
2591 
2592 static int
2593 mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
2594 {
2595 	/*
2596 	 * TODO: uncoment once FW really sets all these bits if
2597 	 * (!mdev->caps.eth.rss_ind_tbl_cap || !mdev->caps.eth.csum_cap ||
2598 	 * !mdev->caps.eth.max_lso_cap || !mdev->caps.eth.vlan_cap ||
2599 	 * !(mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_SCQE_BRK_MOD)) return
2600 	 * -ENOTSUPP;
2601 	 */
2602 
2603 	/* TODO: add more must-to-have features */
2604 
2605 	return (0);
2606 }
2607 
2608 static void
2609 mlx5e_build_ifp_priv(struct mlx5_core_dev *mdev,
2610     struct mlx5e_priv *priv,
2611     int num_comp_vectors)
2612 {
2613 	/*
2614 	 * TODO: Consider link speed for setting "log_sq_size",
2615 	 * "log_rq_size" and "cq_moderation_xxx":
2616 	 */
2617 	priv->params.log_sq_size =
2618 	    MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
2619 	priv->params.log_rq_size =
2620 	    MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
2621 	priv->params.rx_cq_moderation_usec =
2622 	    MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
2623 	    MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE :
2624 	    MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC;
2625 	priv->params.rx_cq_moderation_mode =
2626 	    MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? 1 : 0;
2627 	priv->params.rx_cq_moderation_pkts =
2628 	    MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS;
2629 	priv->params.tx_cq_moderation_usec =
2630 	    MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC;
2631 	priv->params.tx_cq_moderation_pkts =
2632 	    MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
2633 	priv->params.min_rx_wqes =
2634 	    MLX5E_PARAMS_DEFAULT_MIN_RX_WQES;
2635 	priv->params.rx_hash_log_tbl_sz =
2636 	    (order_base_2(num_comp_vectors) >
2637 	    MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ) ?
2638 	    order_base_2(num_comp_vectors) :
2639 	    MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ;
2640 	priv->params.num_tc = 1;
2641 	priv->params.default_vlan_prio = 0;
2642 	priv->counter_set_id = -1;
2643 
2644 	/*
2645 	 * hw lro is currently defaulted to off. when it won't anymore we
2646 	 * will consider the HW capability: "!!MLX5_CAP_ETH(mdev, lro_cap)"
2647 	 */
2648 	priv->params.hw_lro_en = false;
2649 	priv->params.lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
2650 
2651 	priv->params.cqe_zipping_en = !!MLX5_CAP_GEN(mdev, cqe_compression);
2652 
2653 	priv->mdev = mdev;
2654 	priv->params.num_channels = num_comp_vectors;
2655 	priv->order_base_2_num_channels = order_base_2(num_comp_vectors);
2656 	priv->queue_mapping_channel_mask =
2657 	    roundup_pow_of_two(num_comp_vectors) - 1;
2658 	priv->num_tc = priv->params.num_tc;
2659 	priv->default_vlan_prio = priv->params.default_vlan_prio;
2660 
2661 	INIT_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
2662 	INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work);
2663 	INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work);
2664 }
2665 
2666 static int
2667 mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn,
2668     struct mlx5_core_mr *mr)
2669 {
2670 	struct ifnet *ifp = priv->ifp;
2671 	struct mlx5_core_dev *mdev = priv->mdev;
2672 	struct mlx5_create_mkey_mbox_in *in;
2673 	int err;
2674 
2675 	in = mlx5_vzalloc(sizeof(*in));
2676 	if (in == NULL) {
2677 		if_printf(ifp, "%s: failed to allocate inbox\n", __func__);
2678 		return (-ENOMEM);
2679 	}
2680 	in->seg.flags = MLX5_PERM_LOCAL_WRITE |
2681 	    MLX5_PERM_LOCAL_READ |
2682 	    MLX5_ACCESS_MODE_PA;
2683 	in->seg.flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64);
2684 	in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
2685 
2686 	err = mlx5_core_create_mkey(mdev, mr, in, sizeof(*in), NULL, NULL,
2687 	    NULL);
2688 	if (err)
2689 		if_printf(ifp, "%s: mlx5_core_create_mkey failed, %d\n",
2690 		    __func__, err);
2691 
2692 	kvfree(in);
2693 
2694 	return (err);
2695 }
2696 
2697 static const char *mlx5e_vport_stats_desc[] = {
2698 	MLX5E_VPORT_STATS(MLX5E_STATS_DESC)
2699 };
2700 
2701 static const char *mlx5e_pport_stats_desc[] = {
2702 	MLX5E_PPORT_STATS(MLX5E_STATS_DESC)
2703 };
2704 
2705 static void
2706 mlx5e_priv_mtx_init(struct mlx5e_priv *priv)
2707 {
2708 	mtx_init(&priv->async_events_mtx, "mlx5async", MTX_NETWORK_LOCK, MTX_DEF);
2709 	sx_init(&priv->state_lock, "mlx5state");
2710 	callout_init_mtx(&priv->watchdog, &priv->async_events_mtx, 0);
2711 }
2712 
2713 static void
2714 mlx5e_priv_mtx_destroy(struct mlx5e_priv *priv)
2715 {
2716 	mtx_destroy(&priv->async_events_mtx);
2717 	sx_destroy(&priv->state_lock);
2718 }
2719 
2720 static int
2721 sysctl_firmware(SYSCTL_HANDLER_ARGS)
2722 {
2723 	/*
2724 	 * %d.%d%.d the string format.
2725 	 * fw_rev_{maj,min,sub} return u16, 2^16 = 65536.
2726 	 * We need at most 5 chars to store that.
2727 	 * It also has: two "." and NULL at the end, which means we need 18
2728 	 * (5*3 + 3) chars at most.
2729 	 */
2730 	char fw[18];
2731 	struct mlx5e_priv *priv = arg1;
2732 	int error;
2733 
2734 	snprintf(fw, sizeof(fw), "%d.%d.%d", fw_rev_maj(priv->mdev), fw_rev_min(priv->mdev),
2735 	    fw_rev_sub(priv->mdev));
2736 	error = sysctl_handle_string(oidp, fw, sizeof(fw), req);
2737 	return (error);
2738 }
2739 
2740 static void
2741 mlx5e_add_hw_stats(struct mlx5e_priv *priv)
2742 {
2743 	SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw),
2744 	    OID_AUTO, "fw_version", CTLTYPE_STRING | CTLFLAG_RD, priv, 0,
2745 	    sysctl_firmware, "A", "HCA firmware version");
2746 
2747 	SYSCTL_ADD_STRING(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw),
2748 	    OID_AUTO, "board_id", CTLFLAG_RD, priv->mdev->board_id, 0,
2749 	    "Board ID");
2750 }
2751 
2752 static void *
2753 mlx5e_create_ifp(struct mlx5_core_dev *mdev)
2754 {
2755 	static volatile int mlx5_en_unit;
2756 	struct ifnet *ifp;
2757 	struct mlx5e_priv *priv;
2758 	u8 dev_addr[ETHER_ADDR_LEN] __aligned(4);
2759 	struct sysctl_oid_list *child;
2760 	int ncv = mdev->priv.eq_table.num_comp_vectors;
2761 	char unit[16];
2762 	int err;
2763 	int i;
2764 	u32 eth_proto_cap;
2765 
2766 	if (mlx5e_check_required_hca_cap(mdev)) {
2767 		mlx5_core_dbg(mdev, "mlx5e_check_required_hca_cap() failed\n");
2768 		return (NULL);
2769 	}
2770 	priv = malloc(sizeof(*priv), M_MLX5EN, M_WAITOK | M_ZERO);
2771 	if (priv == NULL) {
2772 		mlx5_core_err(mdev, "malloc() failed\n");
2773 		return (NULL);
2774 	}
2775 	mlx5e_priv_mtx_init(priv);
2776 
2777 	ifp = priv->ifp = if_alloc(IFT_ETHER);
2778 	if (ifp == NULL) {
2779 		mlx5_core_err(mdev, "if_alloc() failed\n");
2780 		goto err_free_priv;
2781 	}
2782 	ifp->if_softc = priv;
2783 	if_initname(ifp, "mce", atomic_fetchadd_int(&mlx5_en_unit, 1));
2784 	ifp->if_mtu = ETHERMTU;
2785 	ifp->if_init = mlx5e_open;
2786 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2787 	ifp->if_ioctl = mlx5e_ioctl;
2788 	ifp->if_transmit = mlx5e_xmit;
2789 	ifp->if_qflush = if_qflush;
2790 #if (__FreeBSD_version >= 1100000)
2791 	ifp->if_get_counter = mlx5e_get_counter;
2792 #endif
2793 	ifp->if_snd.ifq_maxlen = ifqmaxlen;
2794 	/*
2795          * Set driver features
2796          */
2797 	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6;
2798 	ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING;
2799 	ifp->if_capabilities |= IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWFILTER;
2800 	ifp->if_capabilities |= IFCAP_LINKSTATE | IFCAP_JUMBO_MTU;
2801 	ifp->if_capabilities |= IFCAP_LRO;
2802 	ifp->if_capabilities |= IFCAP_TSO | IFCAP_VLAN_HWTSO;
2803 
2804 	/* set TSO limits so that we don't have to drop TX packets */
2805 	ifp->if_hw_tsomax = MLX5E_MAX_TX_PAYLOAD_SIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
2806 	ifp->if_hw_tsomaxsegcount = MLX5E_MAX_TX_MBUF_FRAGS - 1 /* hdr */;
2807 	ifp->if_hw_tsomaxsegsize = MLX5E_MAX_TX_MBUF_SIZE;
2808 
2809 	ifp->if_capenable = ifp->if_capabilities;
2810 	ifp->if_hwassist = 0;
2811 	if (ifp->if_capenable & IFCAP_TSO)
2812 		ifp->if_hwassist |= CSUM_TSO;
2813 	if (ifp->if_capenable & IFCAP_TXCSUM)
2814 		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP);
2815 	if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
2816 		ifp->if_hwassist |= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
2817 
2818 	/* ifnet sysctl tree */
2819 	sysctl_ctx_init(&priv->sysctl_ctx);
2820 	priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_dev),
2821 	    OID_AUTO, ifp->if_dname, CTLFLAG_RD, 0, "MLX5 ethernet - interface name");
2822 	if (priv->sysctl_ifnet == NULL) {
2823 		mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
2824 		goto err_free_sysctl;
2825 	}
2826 	snprintf(unit, sizeof(unit), "%d", ifp->if_dunit);
2827 	priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
2828 	    OID_AUTO, unit, CTLFLAG_RD, 0, "MLX5 ethernet - interface unit");
2829 	if (priv->sysctl_ifnet == NULL) {
2830 		mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
2831 		goto err_free_sysctl;
2832 	}
2833 
2834 	/* HW sysctl tree */
2835 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(mdev->pdev->dev.bsddev));
2836 	priv->sysctl_hw = SYSCTL_ADD_NODE(&priv->sysctl_ctx, child,
2837 	    OID_AUTO, "hw", CTLFLAG_RD, 0, "MLX5 ethernet dev hw");
2838 	if (priv->sysctl_hw == NULL) {
2839 		mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
2840 		goto err_free_sysctl;
2841 	}
2842 	mlx5e_build_ifp_priv(mdev, priv, ncv);
2843 	err = mlx5_alloc_map_uar(mdev, &priv->cq_uar);
2844 	if (err) {
2845 		if_printf(ifp, "%s: mlx5_alloc_map_uar failed, %d\n",
2846 		    __func__, err);
2847 		goto err_free_sysctl;
2848 	}
2849 	err = mlx5_core_alloc_pd(mdev, &priv->pdn);
2850 	if (err) {
2851 		if_printf(ifp, "%s: mlx5_core_alloc_pd failed, %d\n",
2852 		    __func__, err);
2853 		goto err_unmap_free_uar;
2854 	}
2855 	err = mlx5_alloc_transport_domain(mdev, &priv->tdn);
2856 	if (err) {
2857 		if_printf(ifp, "%s: mlx5_alloc_transport_domain failed, %d\n",
2858 		    __func__, err);
2859 		goto err_dealloc_pd;
2860 	}
2861 	err = mlx5e_create_mkey(priv, priv->pdn, &priv->mr);
2862 	if (err) {
2863 		if_printf(ifp, "%s: mlx5e_create_mkey failed, %d\n",
2864 		    __func__, err);
2865 		goto err_dealloc_transport_domain;
2866 	}
2867 	mlx5_query_nic_vport_mac_address(priv->mdev, 0, dev_addr);
2868 
2869 	/* set default MTU */
2870 	mlx5e_set_dev_port_mtu(ifp, ifp->if_mtu);
2871 
2872 	/* Set desc */
2873 	device_set_desc(mdev->pdev->dev.bsddev, mlx5e_version);
2874 
2875 	/* Set default media status */
2876 	priv->media_status_last = IFM_AVALID;
2877 	priv->media_active_last = IFM_ETHER | IFM_AUTO;
2878 
2879 	/* Pauseframes are enabled by default */
2880 	priv->params_ethtool.tx_pauseframe_control = 1;
2881 	priv->params_ethtool.rx_pauseframe_control = 1;
2882 
2883 	err = mlx5_query_port_proto_cap(mdev, &eth_proto_cap, MLX5_PTYS_EN);
2884 	if (err) {
2885 		eth_proto_cap = 0;
2886 		if_printf(ifp, "%s: Query port media capability failed, %d\n",
2887 		    __func__, err);
2888 	}
2889 
2890 	/* Setup supported medias */
2891 	ifmedia_init(&priv->media, IFM_IMASK | IFM_ETH_FMASK,
2892 	    mlx5e_media_change, mlx5e_media_status);
2893 
2894 	for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
2895 		if (mlx5e_mode_table[i].baudrate == 0)
2896 			continue;
2897 		if (MLX5E_PROT_MASK(i) & eth_proto_cap)
2898 			ifmedia_add(&priv->media,
2899 			    IFM_ETHER | mlx5e_mode_table[i].subtype |
2900 			    IFM_FDX, 0, NULL);
2901 	}
2902 
2903 	ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2904 	ifmedia_set(&priv->media, IFM_ETHER | IFM_AUTO);
2905 	ether_ifattach(ifp, dev_addr);
2906 
2907 	/* Register for VLAN events */
2908 	priv->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
2909 	    mlx5e_vlan_rx_add_vid, priv, EVENTHANDLER_PRI_FIRST);
2910 	priv->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
2911 	    mlx5e_vlan_rx_kill_vid, priv, EVENTHANDLER_PRI_FIRST);
2912 
2913 	/* Link is down by default */
2914 	if_link_state_change(ifp, LINK_STATE_DOWN);
2915 
2916 	mlx5e_enable_async_events(priv);
2917 
2918 	mlx5e_add_hw_stats(priv);
2919 
2920 	mlx5e_create_stats(&priv->stats.vport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
2921 	    "vstats", mlx5e_vport_stats_desc, MLX5E_VPORT_STATS_NUM,
2922 	    priv->stats.vport.arg);
2923 
2924 	mlx5e_create_stats(&priv->stats.pport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
2925 	    "pstats", mlx5e_pport_stats_desc, MLX5E_PPORT_STATS_NUM,
2926 	    priv->stats.pport.arg);
2927 
2928 	mlx5e_create_ethtool(priv);
2929 
2930 	mtx_lock(&priv->async_events_mtx);
2931 	mlx5e_update_stats(priv);
2932 	mtx_unlock(&priv->async_events_mtx);
2933 
2934 	return (priv);
2935 
2936 err_dealloc_transport_domain:
2937 	mlx5_dealloc_transport_domain(mdev, priv->tdn);
2938 
2939 err_dealloc_pd:
2940 	mlx5_core_dealloc_pd(mdev, priv->pdn);
2941 
2942 err_unmap_free_uar:
2943 	mlx5_unmap_free_uar(mdev, &priv->cq_uar);
2944 
2945 err_free_sysctl:
2946 	sysctl_ctx_free(&priv->sysctl_ctx);
2947 
2948 	if_free(ifp);
2949 
2950 err_free_priv:
2951 	mlx5e_priv_mtx_destroy(priv);
2952 	free(priv, M_MLX5EN);
2953 	return (NULL);
2954 }
2955 
2956 static void
2957 mlx5e_destroy_ifp(struct mlx5_core_dev *mdev, void *vpriv)
2958 {
2959 	struct mlx5e_priv *priv = vpriv;
2960 	struct ifnet *ifp = priv->ifp;
2961 
2962 	/* don't allow more IOCTLs */
2963 	priv->gone = 1;
2964 
2965 	/* XXX wait a bit to allow IOCTL handlers to complete */
2966 	pause("W", hz);
2967 
2968 	/* stop watchdog timer */
2969 	callout_drain(&priv->watchdog);
2970 
2971 	if (priv->vlan_attach != NULL)
2972 		EVENTHANDLER_DEREGISTER(vlan_config, priv->vlan_attach);
2973 	if (priv->vlan_detach != NULL)
2974 		EVENTHANDLER_DEREGISTER(vlan_unconfig, priv->vlan_detach);
2975 
2976 	/* make sure device gets closed */
2977 	PRIV_LOCK(priv);
2978 	mlx5e_close_locked(ifp);
2979 	PRIV_UNLOCK(priv);
2980 
2981 	/* unregister device */
2982 	ifmedia_removeall(&priv->media);
2983 	ether_ifdetach(ifp);
2984 	if_free(ifp);
2985 
2986 	/* destroy all remaining sysctl nodes */
2987 	if (priv->sysctl_debug)
2988 		sysctl_ctx_free(&priv->stats.port_stats_debug.ctx);
2989 	sysctl_ctx_free(&priv->stats.vport.ctx);
2990 	sysctl_ctx_free(&priv->stats.pport.ctx);
2991 	sysctl_ctx_free(&priv->sysctl_ctx);
2992 
2993 	mlx5_core_destroy_mkey(priv->mdev, &priv->mr);
2994 	mlx5_dealloc_transport_domain(priv->mdev, priv->tdn);
2995 	mlx5_core_dealloc_pd(priv->mdev, priv->pdn);
2996 	mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar);
2997 	mlx5e_disable_async_events(priv);
2998 	flush_scheduled_work();
2999 	mlx5e_priv_mtx_destroy(priv);
3000 	free(priv, M_MLX5EN);
3001 }
3002 
3003 static void *
3004 mlx5e_get_ifp(void *vpriv)
3005 {
3006 	struct mlx5e_priv *priv = vpriv;
3007 
3008 	return (priv->ifp);
3009 }
3010 
3011 static struct mlx5_interface mlx5e_interface = {
3012 	.add = mlx5e_create_ifp,
3013 	.remove = mlx5e_destroy_ifp,
3014 	.event = mlx5e_async_event,
3015 	.protocol = MLX5_INTERFACE_PROTOCOL_ETH,
3016 	.get_dev = mlx5e_get_ifp,
3017 };
3018 
3019 void
3020 mlx5e_init(void)
3021 {
3022 	mlx5_register_interface(&mlx5e_interface);
3023 }
3024 
3025 void
3026 mlx5e_cleanup(void)
3027 {
3028 	mlx5_unregister_interface(&mlx5e_interface);
3029 }
3030 
3031 module_init_order(mlx5e_init, SI_ORDER_THIRD);
3032 module_exit_order(mlx5e_cleanup, SI_ORDER_THIRD);
3033 
3034 #if (__FreeBSD_version >= 1100000)
3035 MODULE_DEPEND(mlx5en, linuxkpi, 1, 1, 1);
3036 #endif
3037 MODULE_DEPEND(mlx5en, mlx5, 1, 1, 1);
3038 MODULE_VERSION(mlx5en, 1);
3039