xref: /freebsd/sys/dev/mlx5/mlx5_en/mlx5_en_main.c (revision 3fc36ee018bb836bd1796067cf4ef8683f166ebc)
1 /*-
2  * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  *
25  * $FreeBSD$
26  */
27 
28 #include "en.h"
29 
30 #include <sys/sockio.h>
31 #include <machine/atomic.h>
32 
33 #define	ETH_DRIVER_VERSION	"3.1.0-dev"
34 char mlx5e_version[] = "Mellanox Ethernet driver"
35     " (" ETH_DRIVER_VERSION ")";
36 
37 struct mlx5e_rq_param {
38 	u32	rqc [MLX5_ST_SZ_DW(rqc)];
39 	struct mlx5_wq_param wq;
40 };
41 
42 struct mlx5e_sq_param {
43 	u32	sqc [MLX5_ST_SZ_DW(sqc)];
44 	struct mlx5_wq_param wq;
45 };
46 
47 struct mlx5e_cq_param {
48 	u32	cqc [MLX5_ST_SZ_DW(cqc)];
49 	struct mlx5_wq_param wq;
50 	u16	eq_ix;
51 };
52 
53 struct mlx5e_channel_param {
54 	struct mlx5e_rq_param rq;
55 	struct mlx5e_sq_param sq;
56 	struct mlx5e_cq_param rx_cq;
57 	struct mlx5e_cq_param tx_cq;
58 };
59 
60 static const struct {
61 	u32	subtype;
62 	u64	baudrate;
63 }	mlx5e_mode_table[MLX5E_LINK_MODES_NUMBER] = {
64 
65 	[MLX5E_1000BASE_CX_SGMII] = {
66 		.subtype = IFM_1000_CX_SGMII,
67 		.baudrate = IF_Mbps(1000ULL),
68 	},
69 	[MLX5E_1000BASE_KX] = {
70 		.subtype = IFM_1000_KX,
71 		.baudrate = IF_Mbps(1000ULL),
72 	},
73 	[MLX5E_10GBASE_CX4] = {
74 		.subtype = IFM_10G_CX4,
75 		.baudrate = IF_Gbps(10ULL),
76 	},
77 	[MLX5E_10GBASE_KX4] = {
78 		.subtype = IFM_10G_KX4,
79 		.baudrate = IF_Gbps(10ULL),
80 	},
81 	[MLX5E_10GBASE_KR] = {
82 		.subtype = IFM_10G_KR,
83 		.baudrate = IF_Gbps(10ULL),
84 	},
85 	[MLX5E_20GBASE_KR2] = {
86 		.subtype = IFM_20G_KR2,
87 		.baudrate = IF_Gbps(20ULL),
88 	},
89 	[MLX5E_40GBASE_CR4] = {
90 		.subtype = IFM_40G_CR4,
91 		.baudrate = IF_Gbps(40ULL),
92 	},
93 	[MLX5E_40GBASE_KR4] = {
94 		.subtype = IFM_40G_KR4,
95 		.baudrate = IF_Gbps(40ULL),
96 	},
97 	[MLX5E_56GBASE_R4] = {
98 		.subtype = IFM_56G_R4,
99 		.baudrate = IF_Gbps(56ULL),
100 	},
101 	[MLX5E_10GBASE_CR] = {
102 		.subtype = IFM_10G_CR1,
103 		.baudrate = IF_Gbps(10ULL),
104 	},
105 	[MLX5E_10GBASE_SR] = {
106 		.subtype = IFM_10G_SR,
107 		.baudrate = IF_Gbps(10ULL),
108 	},
109 	[MLX5E_10GBASE_LR] = {
110 		.subtype = IFM_10G_LR,
111 		.baudrate = IF_Gbps(10ULL),
112 	},
113 	[MLX5E_40GBASE_SR4] = {
114 		.subtype = IFM_40G_SR4,
115 		.baudrate = IF_Gbps(40ULL),
116 	},
117 	[MLX5E_40GBASE_LR4] = {
118 		.subtype = IFM_40G_LR4,
119 		.baudrate = IF_Gbps(40ULL),
120 	},
121 	[MLX5E_100GBASE_CR4] = {
122 		.subtype = IFM_100G_CR4,
123 		.baudrate = IF_Gbps(100ULL),
124 	},
125 	[MLX5E_100GBASE_SR4] = {
126 		.subtype = IFM_100G_SR4,
127 		.baudrate = IF_Gbps(100ULL),
128 	},
129 	[MLX5E_100GBASE_KR4] = {
130 		.subtype = IFM_100G_KR4,
131 		.baudrate = IF_Gbps(100ULL),
132 	},
133 	[MLX5E_100GBASE_LR4] = {
134 		.subtype = IFM_100G_LR4,
135 		.baudrate = IF_Gbps(100ULL),
136 	},
137 	[MLX5E_100BASE_TX] = {
138 		.subtype = IFM_100_TX,
139 		.baudrate = IF_Mbps(100ULL),
140 	},
141 	[MLX5E_100BASE_T] = {
142 		.subtype = IFM_100_T,
143 		.baudrate = IF_Mbps(100ULL),
144 	},
145 	[MLX5E_10GBASE_T] = {
146 		.subtype = IFM_10G_T,
147 		.baudrate = IF_Gbps(10ULL),
148 	},
149 	[MLX5E_25GBASE_CR] = {
150 		.subtype = IFM_25G_CR,
151 		.baudrate = IF_Gbps(25ULL),
152 	},
153 	[MLX5E_25GBASE_KR] = {
154 		.subtype = IFM_25G_KR,
155 		.baudrate = IF_Gbps(25ULL),
156 	},
157 	[MLX5E_25GBASE_SR] = {
158 		.subtype = IFM_25G_SR,
159 		.baudrate = IF_Gbps(25ULL),
160 	},
161 	[MLX5E_50GBASE_CR2] = {
162 		.subtype = IFM_50G_CR2,
163 		.baudrate = IF_Gbps(50ULL),
164 	},
165 	[MLX5E_50GBASE_KR2] = {
166 		.subtype = IFM_50G_KR2,
167 		.baudrate = IF_Gbps(50ULL),
168 	},
169 };
170 
171 MALLOC_DEFINE(M_MLX5EN, "MLX5EN", "MLX5 Ethernet");
172 
173 static void
174 mlx5e_update_carrier(struct mlx5e_priv *priv)
175 {
176 	struct mlx5_core_dev *mdev = priv->mdev;
177 	u32 out[MLX5_ST_SZ_DW(ptys_reg)];
178 	u32 eth_proto_oper;
179 	int error;
180 	u8 port_state;
181 	u8 i;
182 
183 	port_state = mlx5_query_vport_state(mdev,
184 	    MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT);
185 
186 	if (port_state == VPORT_STATE_UP) {
187 		priv->media_status_last |= IFM_ACTIVE;
188 	} else {
189 		priv->media_status_last &= ~IFM_ACTIVE;
190 		priv->media_active_last = IFM_ETHER;
191 		if_link_state_change(priv->ifp, LINK_STATE_DOWN);
192 		return;
193 	}
194 
195 	error = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN);
196 	if (error) {
197 		priv->media_active_last = IFM_ETHER;
198 		priv->ifp->if_baudrate = 1;
199 		if_printf(priv->ifp, "%s: query port ptys failed: 0x%x\n",
200 		    __func__, error);
201 		return;
202 	}
203 	eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper);
204 
205 	for (i = 0; i != MLX5E_LINK_MODES_NUMBER; i++) {
206 		if (mlx5e_mode_table[i].baudrate == 0)
207 			continue;
208 		if (MLX5E_PROT_MASK(i) & eth_proto_oper) {
209 			priv->ifp->if_baudrate =
210 			    mlx5e_mode_table[i].baudrate;
211 			priv->media_active_last =
212 			    mlx5e_mode_table[i].subtype | IFM_ETHER | IFM_FDX;
213 		}
214 	}
215 	if_link_state_change(priv->ifp, LINK_STATE_UP);
216 }
217 
218 static void
219 mlx5e_media_status(struct ifnet *dev, struct ifmediareq *ifmr)
220 {
221 	struct mlx5e_priv *priv = dev->if_softc;
222 
223 	ifmr->ifm_status = priv->media_status_last;
224 	ifmr->ifm_active = priv->media_active_last |
225 	    (priv->params.rx_pauseframe_control ? IFM_ETH_RXPAUSE : 0) |
226 	    (priv->params.tx_pauseframe_control ? IFM_ETH_TXPAUSE : 0);
227 
228 }
229 
230 static u32
231 mlx5e_find_link_mode(u32 subtype)
232 {
233 	u32 i;
234 	u32 link_mode = 0;
235 
236 	for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
237 		if (mlx5e_mode_table[i].baudrate == 0)
238 			continue;
239 		if (mlx5e_mode_table[i].subtype == subtype)
240 			link_mode |= MLX5E_PROT_MASK(i);
241 	}
242 
243 	return (link_mode);
244 }
245 
246 static int
247 mlx5e_media_change(struct ifnet *dev)
248 {
249 	struct mlx5e_priv *priv = dev->if_softc;
250 	struct mlx5_core_dev *mdev = priv->mdev;
251 	u32 eth_proto_cap;
252 	u32 link_mode;
253 	int was_opened;
254 	int locked;
255 	int error;
256 
257 	locked = PRIV_LOCKED(priv);
258 	if (!locked)
259 		PRIV_LOCK(priv);
260 
261 	if (IFM_TYPE(priv->media.ifm_media) != IFM_ETHER) {
262 		error = EINVAL;
263 		goto done;
264 	}
265 	link_mode = mlx5e_find_link_mode(IFM_SUBTYPE(priv->media.ifm_media));
266 
267 	/* query supported capabilities */
268 	error = mlx5_query_port_proto_cap(mdev, &eth_proto_cap, MLX5_PTYS_EN);
269 	if (error != 0) {
270 		if_printf(dev, "Query port media capability failed\n");
271 		goto done;
272 	}
273 	/* check for autoselect */
274 	if (IFM_SUBTYPE(priv->media.ifm_media) == IFM_AUTO) {
275 		link_mode = eth_proto_cap;
276 		if (link_mode == 0) {
277 			if_printf(dev, "Port media capability is zero\n");
278 			error = EINVAL;
279 			goto done;
280 		}
281 	} else {
282 		link_mode = link_mode & eth_proto_cap;
283 		if (link_mode == 0) {
284 			if_printf(dev, "Not supported link mode requested\n");
285 			error = EINVAL;
286 			goto done;
287 		}
288 	}
289 	/* update pauseframe control bits */
290 	priv->params.rx_pauseframe_control =
291 	    (priv->media.ifm_media & IFM_ETH_RXPAUSE) ? 1 : 0;
292 	priv->params.tx_pauseframe_control =
293 	    (priv->media.ifm_media & IFM_ETH_TXPAUSE) ? 1 : 0;
294 
295 	/* check if device is opened */
296 	was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
297 
298 	/* reconfigure the hardware */
299 	mlx5_set_port_status(mdev, MLX5_PORT_DOWN);
300 	mlx5_set_port_proto(mdev, link_mode, MLX5_PTYS_EN);
301 	mlx5_set_port_pause(mdev, 1,
302 	    priv->params.rx_pauseframe_control,
303 	    priv->params.tx_pauseframe_control);
304 	if (was_opened)
305 		mlx5_set_port_status(mdev, MLX5_PORT_UP);
306 
307 done:
308 	if (!locked)
309 		PRIV_UNLOCK(priv);
310 	return (error);
311 }
312 
313 static void
314 mlx5e_update_carrier_work(struct work_struct *work)
315 {
316 	struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
317 	    update_carrier_work);
318 
319 	PRIV_LOCK(priv);
320 	if (test_bit(MLX5E_STATE_OPENED, &priv->state))
321 		mlx5e_update_carrier(priv);
322 	PRIV_UNLOCK(priv);
323 }
324 
325 static void
326 mlx5e_update_pport_counters(struct mlx5e_priv *priv)
327 {
328 	struct mlx5_core_dev *mdev = priv->mdev;
329 	struct mlx5e_pport_stats *s = &priv->stats.pport;
330 	struct mlx5e_port_stats_debug *s_debug = &priv->stats.port_stats_debug;
331 	u32 *in;
332 	u32 *out;
333 	u64 *ptr;
334 	unsigned sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
335 	unsigned x;
336 	unsigned y;
337 
338 	in = mlx5_vzalloc(sz);
339 	out = mlx5_vzalloc(sz);
340 	if (in == NULL || out == NULL)
341 		goto free_out;
342 
343 	ptr = (uint64_t *)MLX5_ADDR_OF(ppcnt_reg, out, counter_set);
344 
345 	MLX5_SET(ppcnt_reg, in, local_port, 1);
346 
347 	MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP);
348 	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
349 	for (x = y = 0; x != MLX5E_PPORT_IEEE802_3_STATS_NUM; x++, y++)
350 		s->arg[y] = be64toh(ptr[x]);
351 
352 	MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP);
353 	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
354 	for (x = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM; x++, y++)
355 		s->arg[y] = be64toh(ptr[x]);
356 	for (y = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM +
357 	    MLX5E_PPORT_RFC2819_STATS_DEBUG_NUM; x++, y++)
358 		s_debug->arg[y] = be64toh(ptr[x]);
359 
360 	MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2863_COUNTERS_GROUP);
361 	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
362 	for (x = 0; x != MLX5E_PPORT_RFC2863_STATS_DEBUG_NUM; x++, y++)
363 		s_debug->arg[y] = be64toh(ptr[x]);
364 
365 	MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP);
366 	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
367 	for (x = 0; x != MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG_NUM; x++, y++)
368 		s_debug->arg[y] = be64toh(ptr[x]);
369 free_out:
370 	kvfree(in);
371 	kvfree(out);
372 }
373 
374 static void
375 mlx5e_update_stats_work(struct work_struct *work)
376 {
377 	struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
378 	    update_stats_work);
379 	struct mlx5_core_dev *mdev = priv->mdev;
380 	struct mlx5e_vport_stats *s = &priv->stats.vport;
381 	struct mlx5e_rq_stats *rq_stats;
382 	struct mlx5e_sq_stats *sq_stats;
383 	struct buf_ring *sq_br;
384 #if (__FreeBSD_version < 1100000)
385 	struct ifnet *ifp = priv->ifp;
386 #endif
387 
388 	u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)];
389 	u32 *out;
390 	int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out);
391 	u64 tso_packets = 0;
392 	u64 tso_bytes = 0;
393 	u64 tx_queue_dropped = 0;
394 	u64 tx_defragged = 0;
395 	u64 tx_offload_none = 0;
396 	u64 lro_packets = 0;
397 	u64 lro_bytes = 0;
398 	u64 sw_lro_queued = 0;
399 	u64 sw_lro_flushed = 0;
400 	u64 rx_csum_none = 0;
401 	u64 rx_wqe_err = 0;
402 	u32 rx_out_of_buffer = 0;
403 	int i;
404 	int j;
405 
406 	PRIV_LOCK(priv);
407 	out = mlx5_vzalloc(outlen);
408 	if (out == NULL)
409 		goto free_out;
410 	if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
411 		goto free_out;
412 
413 	/* Collect firts the SW counters and then HW for consistency */
414 	for (i = 0; i < priv->params.num_channels; i++) {
415 		struct mlx5e_rq *rq = &priv->channel[i]->rq;
416 
417 		rq_stats = &priv->channel[i]->rq.stats;
418 
419 		/* collect stats from LRO */
420 		rq_stats->sw_lro_queued = rq->lro.lro_queued;
421 		rq_stats->sw_lro_flushed = rq->lro.lro_flushed;
422 		sw_lro_queued += rq_stats->sw_lro_queued;
423 		sw_lro_flushed += rq_stats->sw_lro_flushed;
424 		lro_packets += rq_stats->lro_packets;
425 		lro_bytes += rq_stats->lro_bytes;
426 		rx_csum_none += rq_stats->csum_none;
427 		rx_wqe_err += rq_stats->wqe_err;
428 
429 		for (j = 0; j < priv->num_tc; j++) {
430 			sq_stats = &priv->channel[i]->sq[j].stats;
431 			sq_br = priv->channel[i]->sq[j].br;
432 
433 			tso_packets += sq_stats->tso_packets;
434 			tso_bytes += sq_stats->tso_bytes;
435 			tx_queue_dropped += sq_stats->dropped;
436 			tx_queue_dropped += sq_br->br_drops;
437 			tx_defragged += sq_stats->defragged;
438 			tx_offload_none += sq_stats->csum_offload_none;
439 		}
440 	}
441 
442 	/* update counters */
443 	s->tso_packets = tso_packets;
444 	s->tso_bytes = tso_bytes;
445 	s->tx_queue_dropped = tx_queue_dropped;
446 	s->tx_defragged = tx_defragged;
447 	s->lro_packets = lro_packets;
448 	s->lro_bytes = lro_bytes;
449 	s->sw_lro_queued = sw_lro_queued;
450 	s->sw_lro_flushed = sw_lro_flushed;
451 	s->rx_csum_none = rx_csum_none;
452 	s->rx_wqe_err = rx_wqe_err;
453 
454 	/* HW counters */
455 	memset(in, 0, sizeof(in));
456 
457 	MLX5_SET(query_vport_counter_in, in, opcode,
458 	    MLX5_CMD_OP_QUERY_VPORT_COUNTER);
459 	MLX5_SET(query_vport_counter_in, in, op_mod, 0);
460 	MLX5_SET(query_vport_counter_in, in, other_vport, 0);
461 
462 	memset(out, 0, outlen);
463 
464 	/* get number of out-of-buffer drops first */
465 	if (mlx5_vport_query_out_of_rx_buffer(mdev, priv->counter_set_id,
466 	    &rx_out_of_buffer))
467 		goto free_out;
468 
469 	/* accumulate difference into a 64-bit counter */
470 	s->rx_out_of_buffer += (u64)(u32)(rx_out_of_buffer - s->rx_out_of_buffer_prev);
471 	s->rx_out_of_buffer_prev = rx_out_of_buffer;
472 
473 	/* get port statistics */
474 	if (mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen))
475 		goto free_out;
476 
477 #define	MLX5_GET_CTR(out, x) \
478 	MLX5_GET64(query_vport_counter_out, out, x)
479 
480 	s->rx_error_packets =
481 	    MLX5_GET_CTR(out, received_errors.packets);
482 	s->rx_error_bytes =
483 	    MLX5_GET_CTR(out, received_errors.octets);
484 	s->tx_error_packets =
485 	    MLX5_GET_CTR(out, transmit_errors.packets);
486 	s->tx_error_bytes =
487 	    MLX5_GET_CTR(out, transmit_errors.octets);
488 
489 	s->rx_unicast_packets =
490 	    MLX5_GET_CTR(out, received_eth_unicast.packets);
491 	s->rx_unicast_bytes =
492 	    MLX5_GET_CTR(out, received_eth_unicast.octets);
493 	s->tx_unicast_packets =
494 	    MLX5_GET_CTR(out, transmitted_eth_unicast.packets);
495 	s->tx_unicast_bytes =
496 	    MLX5_GET_CTR(out, transmitted_eth_unicast.octets);
497 
498 	s->rx_multicast_packets =
499 	    MLX5_GET_CTR(out, received_eth_multicast.packets);
500 	s->rx_multicast_bytes =
501 	    MLX5_GET_CTR(out, received_eth_multicast.octets);
502 	s->tx_multicast_packets =
503 	    MLX5_GET_CTR(out, transmitted_eth_multicast.packets);
504 	s->tx_multicast_bytes =
505 	    MLX5_GET_CTR(out, transmitted_eth_multicast.octets);
506 
507 	s->rx_broadcast_packets =
508 	    MLX5_GET_CTR(out, received_eth_broadcast.packets);
509 	s->rx_broadcast_bytes =
510 	    MLX5_GET_CTR(out, received_eth_broadcast.octets);
511 	s->tx_broadcast_packets =
512 	    MLX5_GET_CTR(out, transmitted_eth_broadcast.packets);
513 	s->tx_broadcast_bytes =
514 	    MLX5_GET_CTR(out, transmitted_eth_broadcast.octets);
515 
516 	s->rx_packets =
517 	    s->rx_unicast_packets +
518 	    s->rx_multicast_packets +
519 	    s->rx_broadcast_packets -
520 	    s->rx_out_of_buffer;
521 	s->rx_bytes =
522 	    s->rx_unicast_bytes +
523 	    s->rx_multicast_bytes +
524 	    s->rx_broadcast_bytes;
525 	s->tx_packets =
526 	    s->tx_unicast_packets +
527 	    s->tx_multicast_packets +
528 	    s->tx_broadcast_packets;
529 	s->tx_bytes =
530 	    s->tx_unicast_bytes +
531 	    s->tx_multicast_bytes +
532 	    s->tx_broadcast_bytes;
533 
534 	/* Update calculated offload counters */
535 	s->tx_csum_offload = s->tx_packets - tx_offload_none;
536 	s->rx_csum_good = s->rx_packets - s->rx_csum_none;
537 
538 	/* Update per port counters */
539 	mlx5e_update_pport_counters(priv);
540 
541 #if (__FreeBSD_version < 1100000)
542 	/* no get_counters interface in fbsd 10 */
543 	ifp->if_ipackets = s->rx_packets;
544 	ifp->if_ierrors = s->rx_error_packets;
545 	ifp->if_iqdrops = s->rx_out_of_buffer;
546 	ifp->if_opackets = s->tx_packets;
547 	ifp->if_oerrors = s->tx_error_packets;
548 	ifp->if_snd.ifq_drops = s->tx_queue_dropped;
549 	ifp->if_ibytes = s->rx_bytes;
550 	ifp->if_obytes = s->tx_bytes;
551 #endif
552 
553 free_out:
554 	kvfree(out);
555 	PRIV_UNLOCK(priv);
556 }
557 
558 static void
559 mlx5e_update_stats(void *arg)
560 {
561 	struct mlx5e_priv *priv = arg;
562 
563 	schedule_work(&priv->update_stats_work);
564 
565 	callout_reset(&priv->watchdog, hz, &mlx5e_update_stats, priv);
566 }
567 
568 static void
569 mlx5e_async_event_sub(struct mlx5e_priv *priv,
570     enum mlx5_dev_event event)
571 {
572 	switch (event) {
573 	case MLX5_DEV_EVENT_PORT_UP:
574 	case MLX5_DEV_EVENT_PORT_DOWN:
575 		schedule_work(&priv->update_carrier_work);
576 		break;
577 
578 	default:
579 		break;
580 	}
581 }
582 
583 static void
584 mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv,
585     enum mlx5_dev_event event, unsigned long param)
586 {
587 	struct mlx5e_priv *priv = vpriv;
588 
589 	mtx_lock(&priv->async_events_mtx);
590 	if (test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state))
591 		mlx5e_async_event_sub(priv, event);
592 	mtx_unlock(&priv->async_events_mtx);
593 }
594 
595 static void
596 mlx5e_enable_async_events(struct mlx5e_priv *priv)
597 {
598 	set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
599 }
600 
601 static void
602 mlx5e_disable_async_events(struct mlx5e_priv *priv)
603 {
604 	mtx_lock(&priv->async_events_mtx);
605 	clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
606 	mtx_unlock(&priv->async_events_mtx);
607 }
608 
609 static const char *mlx5e_rq_stats_desc[] = {
610 	MLX5E_RQ_STATS(MLX5E_STATS_DESC)
611 };
612 
613 static int
614 mlx5e_create_rq(struct mlx5e_channel *c,
615     struct mlx5e_rq_param *param,
616     struct mlx5e_rq *rq)
617 {
618 	struct mlx5e_priv *priv = c->priv;
619 	struct mlx5_core_dev *mdev = priv->mdev;
620 	char buffer[16];
621 	void *rqc = param->rqc;
622 	void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
623 	int wq_sz;
624 	int err;
625 	int i;
626 
627 	/* Create DMA descriptor TAG */
628 	if ((err = -bus_dma_tag_create(
629 	    bus_get_dma_tag(mdev->pdev->dev.bsddev),
630 	    1,				/* any alignment */
631 	    0,				/* no boundary */
632 	    BUS_SPACE_MAXADDR,		/* lowaddr */
633 	    BUS_SPACE_MAXADDR,		/* highaddr */
634 	    NULL, NULL,			/* filter, filterarg */
635 	    MJUM16BYTES,		/* maxsize */
636 	    1,				/* nsegments */
637 	    MJUM16BYTES,		/* maxsegsize */
638 	    0,				/* flags */
639 	    NULL, NULL,			/* lockfunc, lockfuncarg */
640 	    &rq->dma_tag)))
641 		goto done;
642 
643 	err = mlx5_wq_ll_create(mdev, &param->wq, rqc_wq, &rq->wq,
644 	    &rq->wq_ctrl);
645 	if (err)
646 		goto err_free_dma_tag;
647 
648 	rq->wq.db = &rq->wq.db[MLX5_RCV_DBR];
649 
650 	if (priv->params.hw_lro_en) {
651 		rq->wqe_sz = priv->params.lro_wqe_sz;
652 	} else {
653 		rq->wqe_sz = MLX5E_SW2MB_MTU(priv->ifp->if_mtu);
654 	}
655 	if (rq->wqe_sz > MJUM16BYTES) {
656 		err = -ENOMEM;
657 		goto err_rq_wq_destroy;
658 	} else if (rq->wqe_sz > MJUM9BYTES) {
659 		rq->wqe_sz = MJUM16BYTES;
660 	} else if (rq->wqe_sz > MJUMPAGESIZE) {
661 		rq->wqe_sz = MJUM9BYTES;
662 	} else if (rq->wqe_sz > MCLBYTES) {
663 		rq->wqe_sz = MJUMPAGESIZE;
664 	} else {
665 		rq->wqe_sz = MCLBYTES;
666 	}
667 
668 	wq_sz = mlx5_wq_ll_get_size(&rq->wq);
669 
670 	err = -tcp_lro_init_args(&rq->lro, c->ifp, TCP_LRO_ENTRIES, wq_sz);
671 	if (err)
672 		goto err_rq_wq_destroy;
673 
674 	rq->mbuf = malloc(wq_sz * sizeof(rq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO);
675 	if (rq->mbuf == NULL) {
676 		err = -ENOMEM;
677 		goto err_lro_init;
678 	}
679 	for (i = 0; i != wq_sz; i++) {
680 		struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i);
681 		uint32_t byte_count = rq->wqe_sz - MLX5E_NET_IP_ALIGN;
682 
683 		err = -bus_dmamap_create(rq->dma_tag, 0, &rq->mbuf[i].dma_map);
684 		if (err != 0) {
685 			while (i--)
686 				bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map);
687 			goto err_rq_mbuf_free;
688 		}
689 		wqe->data.lkey = c->mkey_be;
690 		wqe->data.byte_count = cpu_to_be32(byte_count | MLX5_HW_START_PADDING);
691 	}
692 
693 	rq->pdev = c->pdev;
694 	rq->ifp = c->ifp;
695 	rq->channel = c;
696 	rq->ix = c->ix;
697 
698 	snprintf(buffer, sizeof(buffer), "rxstat%d", c->ix);
699 	mlx5e_create_stats(&rq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
700 	    buffer, mlx5e_rq_stats_desc, MLX5E_RQ_STATS_NUM,
701 	    rq->stats.arg);
702 	return (0);
703 
704 err_rq_mbuf_free:
705 	free(rq->mbuf, M_MLX5EN);
706 err_lro_init:
707 	tcp_lro_free(&rq->lro);
708 err_rq_wq_destroy:
709 	mlx5_wq_destroy(&rq->wq_ctrl);
710 err_free_dma_tag:
711 	bus_dma_tag_destroy(rq->dma_tag);
712 done:
713 	return (err);
714 }
715 
716 static void
717 mlx5e_destroy_rq(struct mlx5e_rq *rq)
718 {
719 	int wq_sz;
720 	int i;
721 
722 	/* destroy all sysctl nodes */
723 	sysctl_ctx_free(&rq->stats.ctx);
724 
725 	/* free leftover LRO packets, if any */
726 	tcp_lro_free(&rq->lro);
727 
728 	wq_sz = mlx5_wq_ll_get_size(&rq->wq);
729 	for (i = 0; i != wq_sz; i++) {
730 		if (rq->mbuf[i].mbuf != NULL) {
731 			bus_dmamap_unload(rq->dma_tag,
732 			    rq->mbuf[i].dma_map);
733 			m_freem(rq->mbuf[i].mbuf);
734 		}
735 		bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map);
736 	}
737 	free(rq->mbuf, M_MLX5EN);
738 	mlx5_wq_destroy(&rq->wq_ctrl);
739 }
740 
741 static int
742 mlx5e_enable_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param)
743 {
744 	struct mlx5e_channel *c = rq->channel;
745 	struct mlx5e_priv *priv = c->priv;
746 	struct mlx5_core_dev *mdev = priv->mdev;
747 
748 	void *in;
749 	void *rqc;
750 	void *wq;
751 	int inlen;
752 	int err;
753 
754 	inlen = MLX5_ST_SZ_BYTES(create_rq_in) +
755 	    sizeof(u64) * rq->wq_ctrl.buf.npages;
756 	in = mlx5_vzalloc(inlen);
757 	if (in == NULL)
758 		return (-ENOMEM);
759 
760 	rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
761 	wq = MLX5_ADDR_OF(rqc, rqc, wq);
762 
763 	memcpy(rqc, param->rqc, sizeof(param->rqc));
764 
765 	MLX5_SET(rqc, rqc, cqn, c->rq.cq.mcq.cqn);
766 	MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
767 	MLX5_SET(rqc, rqc, flush_in_error_en, 1);
768 	if (priv->counter_set_id >= 0)
769 		MLX5_SET(rqc, rqc, counter_set_id, priv->counter_set_id);
770 	MLX5_SET(wq, wq, log_wq_pg_sz, rq->wq_ctrl.buf.page_shift -
771 	    PAGE_SHIFT);
772 	MLX5_SET64(wq, wq, dbr_addr, rq->wq_ctrl.db.dma);
773 
774 	mlx5_fill_page_array(&rq->wq_ctrl.buf,
775 	    (__be64 *) MLX5_ADDR_OF(wq, wq, pas));
776 
777 	err = mlx5_core_create_rq(mdev, in, inlen, &rq->rqn);
778 
779 	kvfree(in);
780 
781 	return (err);
782 }
783 
784 static int
785 mlx5e_modify_rq(struct mlx5e_rq *rq, int curr_state, int next_state)
786 {
787 	struct mlx5e_channel *c = rq->channel;
788 	struct mlx5e_priv *priv = c->priv;
789 	struct mlx5_core_dev *mdev = priv->mdev;
790 
791 	void *in;
792 	void *rqc;
793 	int inlen;
794 	int err;
795 
796 	inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
797 	in = mlx5_vzalloc(inlen);
798 	if (in == NULL)
799 		return (-ENOMEM);
800 
801 	rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
802 
803 	MLX5_SET(modify_rq_in, in, rqn, rq->rqn);
804 	MLX5_SET(modify_rq_in, in, rq_state, curr_state);
805 	MLX5_SET(rqc, rqc, state, next_state);
806 
807 	err = mlx5_core_modify_rq(mdev, in, inlen);
808 
809 	kvfree(in);
810 
811 	return (err);
812 }
813 
814 static void
815 mlx5e_disable_rq(struct mlx5e_rq *rq)
816 {
817 	struct mlx5e_channel *c = rq->channel;
818 	struct mlx5e_priv *priv = c->priv;
819 	struct mlx5_core_dev *mdev = priv->mdev;
820 
821 	mlx5_core_destroy_rq(mdev, rq->rqn);
822 }
823 
824 static int
825 mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq)
826 {
827 	struct mlx5e_channel *c = rq->channel;
828 	struct mlx5e_priv *priv = c->priv;
829 	struct mlx5_wq_ll *wq = &rq->wq;
830 	int i;
831 
832 	for (i = 0; i < 1000; i++) {
833 		if (wq->cur_sz >= priv->params.min_rx_wqes)
834 			return (0);
835 
836 		msleep(4);
837 	}
838 	return (-ETIMEDOUT);
839 }
840 
841 static int
842 mlx5e_open_rq(struct mlx5e_channel *c,
843     struct mlx5e_rq_param *param,
844     struct mlx5e_rq *rq)
845 {
846 	int err;
847 
848 	err = mlx5e_create_rq(c, param, rq);
849 	if (err)
850 		return (err);
851 
852 	err = mlx5e_enable_rq(rq, param);
853 	if (err)
854 		goto err_destroy_rq;
855 
856 	err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
857 	if (err)
858 		goto err_disable_rq;
859 
860 	c->rq.enabled = 1;
861 
862 	return (0);
863 
864 err_disable_rq:
865 	mlx5e_disable_rq(rq);
866 err_destroy_rq:
867 	mlx5e_destroy_rq(rq);
868 
869 	return (err);
870 }
871 
872 static void
873 mlx5e_close_rq(struct mlx5e_rq *rq)
874 {
875 	rq->enabled = 0;
876 	mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
877 }
878 
879 static void
880 mlx5e_close_rq_wait(struct mlx5e_rq *rq)
881 {
882 	/* wait till RQ is empty */
883 	while (!mlx5_wq_ll_is_empty(&rq->wq)) {
884 		msleep(4);
885 		rq->cq.mcq.comp(&rq->cq.mcq);
886 	}
887 
888 	mlx5e_disable_rq(rq);
889 	mlx5e_destroy_rq(rq);
890 }
891 
892 static void
893 mlx5e_free_sq_db(struct mlx5e_sq *sq)
894 {
895 	int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
896 	int x;
897 
898 	for (x = 0; x != wq_sz; x++)
899 		bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map);
900 	free(sq->mbuf, M_MLX5EN);
901 }
902 
903 static int
904 mlx5e_alloc_sq_db(struct mlx5e_sq *sq)
905 {
906 	int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
907 	int err;
908 	int x;
909 
910 	sq->mbuf = malloc(wq_sz * sizeof(sq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO);
911 	if (sq->mbuf == NULL)
912 		return (-ENOMEM);
913 
914 	/* Create DMA descriptor MAPs */
915 	for (x = 0; x != wq_sz; x++) {
916 		err = -bus_dmamap_create(sq->dma_tag, 0, &sq->mbuf[x].dma_map);
917 		if (err != 0) {
918 			while (x--)
919 				bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map);
920 			free(sq->mbuf, M_MLX5EN);
921 			return (err);
922 		}
923 	}
924 	return (0);
925 }
926 
927 static const char *mlx5e_sq_stats_desc[] = {
928 	MLX5E_SQ_STATS(MLX5E_STATS_DESC)
929 };
930 
931 static int
932 mlx5e_create_sq(struct mlx5e_channel *c,
933     int tc,
934     struct mlx5e_sq_param *param,
935     struct mlx5e_sq *sq)
936 {
937 	struct mlx5e_priv *priv = c->priv;
938 	struct mlx5_core_dev *mdev = priv->mdev;
939 	char buffer[16];
940 
941 	void *sqc = param->sqc;
942 	void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq);
943 #ifdef RSS
944 	cpuset_t cpu_mask;
945 	int cpu_id;
946 #endif
947 	int err;
948 
949 	/* Create DMA descriptor TAG */
950 	if ((err = -bus_dma_tag_create(
951 	    bus_get_dma_tag(mdev->pdev->dev.bsddev),
952 	    1,				/* any alignment */
953 	    0,				/* no boundary */
954 	    BUS_SPACE_MAXADDR,		/* lowaddr */
955 	    BUS_SPACE_MAXADDR,		/* highaddr */
956 	    NULL, NULL,			/* filter, filterarg */
957 	    MLX5E_MAX_TX_PAYLOAD_SIZE,	/* maxsize */
958 	    MLX5E_MAX_TX_MBUF_FRAGS,	/* nsegments */
959 	    MLX5E_MAX_TX_MBUF_SIZE,	/* maxsegsize */
960 	    0,				/* flags */
961 	    NULL, NULL,			/* lockfunc, lockfuncarg */
962 	    &sq->dma_tag)))
963 		goto done;
964 
965 	err = mlx5_alloc_map_uar(mdev, &sq->uar);
966 	if (err)
967 		goto err_free_dma_tag;
968 
969 	err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq,
970 	    &sq->wq_ctrl);
971 	if (err)
972 		goto err_unmap_free_uar;
973 
974 	sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
975 	sq->uar_map = sq->uar.map;
976 	sq->uar_bf_map = sq->uar.bf_map;
977 	sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2;
978 
979 	err = mlx5e_alloc_sq_db(sq);
980 	if (err)
981 		goto err_sq_wq_destroy;
982 
983 	sq->pdev = c->pdev;
984 	sq->mkey_be = c->mkey_be;
985 	sq->channel = c;
986 	sq->tc = tc;
987 
988 	sq->br = buf_ring_alloc(MLX5E_SQ_TX_QUEUE_SIZE, M_MLX5EN,
989 	    M_WAITOK, &sq->lock);
990 	if (sq->br == NULL) {
991 		if_printf(c->ifp, "%s: Failed allocating sq drbr buffer\n",
992 		    __func__);
993 		err = -ENOMEM;
994 		goto err_free_sq_db;
995 	}
996 
997 	sq->sq_tq = taskqueue_create_fast("mlx5e_que", M_WAITOK,
998 	    taskqueue_thread_enqueue, &sq->sq_tq);
999 	if (sq->sq_tq == NULL) {
1000 		if_printf(c->ifp, "%s: Failed allocating taskqueue\n",
1001 		    __func__);
1002 		err = -ENOMEM;
1003 		goto err_free_drbr;
1004 	}
1005 
1006 	TASK_INIT(&sq->sq_task, 0, mlx5e_tx_que, sq);
1007 #ifdef RSS
1008 	cpu_id = rss_getcpu(c->ix % rss_getnumbuckets());
1009 	CPU_SETOF(cpu_id, &cpu_mask);
1010 	taskqueue_start_threads_cpuset(&sq->sq_tq, 1, PI_NET, &cpu_mask,
1011 	    "%s TX SQ%d.%d CPU%d", c->ifp->if_xname, c->ix, tc, cpu_id);
1012 #else
1013 	taskqueue_start_threads(&sq->sq_tq, 1, PI_NET,
1014 	    "%s TX SQ%d.%d", c->ifp->if_xname, c->ix, tc);
1015 #endif
1016 	snprintf(buffer, sizeof(buffer), "txstat%dtc%d", c->ix, tc);
1017 	mlx5e_create_stats(&sq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
1018 	    buffer, mlx5e_sq_stats_desc, MLX5E_SQ_STATS_NUM,
1019 	    sq->stats.arg);
1020 
1021 	return (0);
1022 
1023 err_free_drbr:
1024 	buf_ring_free(sq->br, M_MLX5EN);
1025 err_free_sq_db:
1026 	mlx5e_free_sq_db(sq);
1027 err_sq_wq_destroy:
1028 	mlx5_wq_destroy(&sq->wq_ctrl);
1029 
1030 err_unmap_free_uar:
1031 	mlx5_unmap_free_uar(mdev, &sq->uar);
1032 
1033 err_free_dma_tag:
1034 	bus_dma_tag_destroy(sq->dma_tag);
1035 done:
1036 	return (err);
1037 }
1038 
1039 static void
1040 mlx5e_destroy_sq(struct mlx5e_sq *sq)
1041 {
1042 	struct mlx5e_channel *c = sq->channel;
1043 	struct mlx5e_priv *priv = c->priv;
1044 
1045 	/* destroy all sysctl nodes */
1046 	sysctl_ctx_free(&sq->stats.ctx);
1047 
1048 	mlx5e_free_sq_db(sq);
1049 	mlx5_wq_destroy(&sq->wq_ctrl);
1050 	mlx5_unmap_free_uar(priv->mdev, &sq->uar);
1051 	taskqueue_drain(sq->sq_tq, &sq->sq_task);
1052 	taskqueue_free(sq->sq_tq);
1053 	buf_ring_free(sq->br, M_MLX5EN);
1054 }
1055 
1056 static int
1057 mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param)
1058 {
1059 	struct mlx5e_channel *c = sq->channel;
1060 	struct mlx5e_priv *priv = c->priv;
1061 	struct mlx5_core_dev *mdev = priv->mdev;
1062 
1063 	void *in;
1064 	void *sqc;
1065 	void *wq;
1066 	int inlen;
1067 	int err;
1068 
1069 	inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
1070 	    sizeof(u64) * sq->wq_ctrl.buf.npages;
1071 	in = mlx5_vzalloc(inlen);
1072 	if (in == NULL)
1073 		return (-ENOMEM);
1074 
1075 	sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
1076 	wq = MLX5_ADDR_OF(sqc, sqc, wq);
1077 
1078 	memcpy(sqc, param->sqc, sizeof(param->sqc));
1079 
1080 	MLX5_SET(sqc, sqc, tis_num_0, priv->tisn[sq->tc]);
1081 	MLX5_SET(sqc, sqc, cqn, c->sq[sq->tc].cq.mcq.cqn);
1082 	MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
1083 	MLX5_SET(sqc, sqc, tis_lst_sz, 1);
1084 	MLX5_SET(sqc, sqc, flush_in_error_en, 1);
1085 
1086 	MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
1087 	MLX5_SET(wq, wq, uar_page, sq->uar.index);
1088 	MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift -
1089 	    PAGE_SHIFT);
1090 	MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma);
1091 
1092 	mlx5_fill_page_array(&sq->wq_ctrl.buf,
1093 	    (__be64 *) MLX5_ADDR_OF(wq, wq, pas));
1094 
1095 	err = mlx5_core_create_sq(mdev, in, inlen, &sq->sqn);
1096 
1097 	kvfree(in);
1098 
1099 	return (err);
1100 }
1101 
1102 static int
1103 mlx5e_modify_sq(struct mlx5e_sq *sq, int curr_state, int next_state)
1104 {
1105 	struct mlx5e_channel *c = sq->channel;
1106 	struct mlx5e_priv *priv = c->priv;
1107 	struct mlx5_core_dev *mdev = priv->mdev;
1108 
1109 	void *in;
1110 	void *sqc;
1111 	int inlen;
1112 	int err;
1113 
1114 	inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
1115 	in = mlx5_vzalloc(inlen);
1116 	if (in == NULL)
1117 		return (-ENOMEM);
1118 
1119 	sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
1120 
1121 	MLX5_SET(modify_sq_in, in, sqn, sq->sqn);
1122 	MLX5_SET(modify_sq_in, in, sq_state, curr_state);
1123 	MLX5_SET(sqc, sqc, state, next_state);
1124 
1125 	err = mlx5_core_modify_sq(mdev, in, inlen);
1126 
1127 	kvfree(in);
1128 
1129 	return (err);
1130 }
1131 
1132 static void
1133 mlx5e_disable_sq(struct mlx5e_sq *sq)
1134 {
1135 	struct mlx5e_channel *c = sq->channel;
1136 	struct mlx5e_priv *priv = c->priv;
1137 	struct mlx5_core_dev *mdev = priv->mdev;
1138 
1139 	mlx5_core_destroy_sq(mdev, sq->sqn);
1140 }
1141 
1142 static int
1143 mlx5e_open_sq(struct mlx5e_channel *c,
1144     int tc,
1145     struct mlx5e_sq_param *param,
1146     struct mlx5e_sq *sq)
1147 {
1148 	int err;
1149 
1150 	err = mlx5e_create_sq(c, tc, param, sq);
1151 	if (err)
1152 		return (err);
1153 
1154 	err = mlx5e_enable_sq(sq, param);
1155 	if (err)
1156 		goto err_destroy_sq;
1157 
1158 	err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY);
1159 	if (err)
1160 		goto err_disable_sq;
1161 
1162 	atomic_store_rel_int(&sq->queue_state, MLX5E_SQ_READY);
1163 
1164 	return (0);
1165 
1166 err_disable_sq:
1167 	mlx5e_disable_sq(sq);
1168 err_destroy_sq:
1169 	mlx5e_destroy_sq(sq);
1170 
1171 	return (err);
1172 }
1173 
1174 static void
1175 mlx5e_sq_send_nops_locked(struct mlx5e_sq *sq, int can_sleep)
1176 {
1177 	/* fill up remainder with NOPs */
1178 	while (sq->cev_counter != 0) {
1179 		while (!mlx5e_sq_has_room_for(sq, 1)) {
1180 			if (can_sleep != 0) {
1181 				mtx_unlock(&sq->lock);
1182 				msleep(4);
1183 				mtx_lock(&sq->lock);
1184 			} else {
1185 				goto done;
1186 			}
1187 		}
1188 		/* send a single NOP */
1189 		mlx5e_send_nop(sq, 1);
1190 		wmb();
1191 	}
1192 done:
1193 	/* Check if we need to write the doorbell */
1194 	if (likely(sq->doorbell.d64 != 0)) {
1195 		mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0);
1196 		sq->doorbell.d64 = 0;
1197 	}
1198 	return;
1199 }
1200 
1201 void
1202 mlx5e_sq_cev_timeout(void *arg)
1203 {
1204 	struct mlx5e_sq *sq = arg;
1205 
1206 	mtx_assert(&sq->lock, MA_OWNED);
1207 
1208 	/* check next state */
1209 	switch (sq->cev_next_state) {
1210 	case MLX5E_CEV_STATE_SEND_NOPS:
1211 		/* fill TX ring with NOPs, if any */
1212 		mlx5e_sq_send_nops_locked(sq, 0);
1213 
1214 		/* check if completed */
1215 		if (sq->cev_counter == 0) {
1216 			sq->cev_next_state = MLX5E_CEV_STATE_INITIAL;
1217 			return;
1218 		}
1219 		break;
1220 	default:
1221 		/* send NOPs on next timeout */
1222 		sq->cev_next_state = MLX5E_CEV_STATE_SEND_NOPS;
1223 		break;
1224 	}
1225 
1226 	/* restart timer */
1227 	callout_reset_curcpu(&sq->cev_callout, hz, mlx5e_sq_cev_timeout, sq);
1228 }
1229 
1230 static void
1231 mlx5e_close_sq_wait(struct mlx5e_sq *sq)
1232 {
1233 
1234 	mtx_lock(&sq->lock);
1235 	/* teardown event factor timer, if any */
1236 	sq->cev_next_state = MLX5E_CEV_STATE_HOLD_NOPS;
1237 	callout_stop(&sq->cev_callout);
1238 
1239 	/* send dummy NOPs in order to flush the transmit ring */
1240 	mlx5e_sq_send_nops_locked(sq, 1);
1241 	mtx_unlock(&sq->lock);
1242 
1243 	/* make sure it is safe to free the callout */
1244 	callout_drain(&sq->cev_callout);
1245 
1246 	/* error out remaining requests */
1247 	mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR);
1248 
1249 	/* wait till SQ is empty */
1250 	mtx_lock(&sq->lock);
1251 	while (sq->cc != sq->pc) {
1252 		mtx_unlock(&sq->lock);
1253 		msleep(4);
1254 		sq->cq.mcq.comp(&sq->cq.mcq);
1255 		mtx_lock(&sq->lock);
1256 	}
1257 	mtx_unlock(&sq->lock);
1258 
1259 	mlx5e_disable_sq(sq);
1260 	mlx5e_destroy_sq(sq);
1261 }
1262 
1263 static int
1264 mlx5e_create_cq(struct mlx5e_channel *c,
1265     struct mlx5e_cq_param *param,
1266     struct mlx5e_cq *cq,
1267     mlx5e_cq_comp_t *comp)
1268 {
1269 	struct mlx5e_priv *priv = c->priv;
1270 	struct mlx5_core_dev *mdev = priv->mdev;
1271 	struct mlx5_core_cq *mcq = &cq->mcq;
1272 	int eqn_not_used;
1273 	int irqn;
1274 	int err;
1275 	u32 i;
1276 
1277 	param->wq.buf_numa_node = 0;
1278 	param->wq.db_numa_node = 0;
1279 	param->eq_ix = c->ix;
1280 
1281 	err = mlx5_cqwq_create(mdev, &param->wq, param->cqc, &cq->wq,
1282 	    &cq->wq_ctrl);
1283 	if (err)
1284 		return (err);
1285 
1286 	mlx5_vector2eqn(mdev, param->eq_ix, &eqn_not_used, &irqn);
1287 
1288 	mcq->cqe_sz = 64;
1289 	mcq->set_ci_db = cq->wq_ctrl.db.db;
1290 	mcq->arm_db = cq->wq_ctrl.db.db + 1;
1291 	*mcq->set_ci_db = 0;
1292 	*mcq->arm_db = 0;
1293 	mcq->vector = param->eq_ix;
1294 	mcq->comp = comp;
1295 	mcq->event = mlx5e_cq_error_event;
1296 	mcq->irqn = irqn;
1297 	mcq->uar = &priv->cq_uar;
1298 
1299 	for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
1300 		struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
1301 
1302 		cqe->op_own = 0xf1;
1303 	}
1304 
1305 	cq->channel = c;
1306 
1307 	return (0);
1308 }
1309 
1310 static void
1311 mlx5e_destroy_cq(struct mlx5e_cq *cq)
1312 {
1313 	mlx5_wq_destroy(&cq->wq_ctrl);
1314 }
1315 
1316 static int
1317 mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param,
1318     u8 moderation_mode)
1319 {
1320 	struct mlx5e_channel *c = cq->channel;
1321 	struct mlx5e_priv *priv = c->priv;
1322 	struct mlx5_core_dev *mdev = priv->mdev;
1323 	struct mlx5_core_cq *mcq = &cq->mcq;
1324 	void *in;
1325 	void *cqc;
1326 	int inlen;
1327 	int irqn_not_used;
1328 	int eqn;
1329 	int err;
1330 
1331 	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
1332 	    sizeof(u64) * cq->wq_ctrl.buf.npages;
1333 	in = mlx5_vzalloc(inlen);
1334 	if (in == NULL)
1335 		return (-ENOMEM);
1336 
1337 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
1338 
1339 	memcpy(cqc, param->cqc, sizeof(param->cqc));
1340 
1341 	mlx5_fill_page_array(&cq->wq_ctrl.buf,
1342 	    (__be64 *) MLX5_ADDR_OF(create_cq_in, in, pas));
1343 
1344 	mlx5_vector2eqn(mdev, param->eq_ix, &eqn, &irqn_not_used);
1345 
1346 	MLX5_SET(cqc, cqc, cq_period_mode, moderation_mode);
1347 	MLX5_SET(cqc, cqc, c_eqn, eqn);
1348 	MLX5_SET(cqc, cqc, uar_page, mcq->uar->index);
1349 	MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
1350 	    PAGE_SHIFT);
1351 	MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
1352 
1353 	err = mlx5_core_create_cq(mdev, mcq, in, inlen);
1354 
1355 	kvfree(in);
1356 
1357 	if (err)
1358 		return (err);
1359 
1360 	mlx5e_cq_arm(cq);
1361 
1362 	return (0);
1363 }
1364 
1365 static void
1366 mlx5e_disable_cq(struct mlx5e_cq *cq)
1367 {
1368 	struct mlx5e_channel *c = cq->channel;
1369 	struct mlx5e_priv *priv = c->priv;
1370 	struct mlx5_core_dev *mdev = priv->mdev;
1371 
1372 	mlx5_core_destroy_cq(mdev, &cq->mcq);
1373 }
1374 
1375 static int
1376 mlx5e_open_cq(struct mlx5e_channel *c,
1377     struct mlx5e_cq_param *param,
1378     struct mlx5e_cq *cq,
1379     mlx5e_cq_comp_t *comp,
1380     u8 moderation_mode)
1381 {
1382 	int err;
1383 
1384 	err = mlx5e_create_cq(c, param, cq, comp);
1385 	if (err)
1386 		return (err);
1387 
1388 	err = mlx5e_enable_cq(cq, param, moderation_mode);
1389 	if (err)
1390 		goto err_destroy_cq;
1391 
1392 	return (0);
1393 
1394 err_destroy_cq:
1395 	mlx5e_destroy_cq(cq);
1396 
1397 	return (err);
1398 }
1399 
1400 static void
1401 mlx5e_close_cq(struct mlx5e_cq *cq)
1402 {
1403 	mlx5e_disable_cq(cq);
1404 	mlx5e_destroy_cq(cq);
1405 }
1406 
1407 static int
1408 mlx5e_open_tx_cqs(struct mlx5e_channel *c,
1409     struct mlx5e_channel_param *cparam)
1410 {
1411 	u8 tx_moderation_mode;
1412 	int err;
1413 	int tc;
1414 
1415 	switch (c->priv->params.tx_cq_moderation_mode) {
1416 	case 0:
1417 		tx_moderation_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
1418 		break;
1419 	default:
1420 		if (MLX5_CAP_GEN(c->priv->mdev, cq_period_start_from_cqe))
1421 			tx_moderation_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE;
1422 		else
1423 			tx_moderation_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
1424 		break;
1425 	}
1426 	for (tc = 0; tc < c->num_tc; tc++) {
1427 		/* open completion queue */
1428 		err = mlx5e_open_cq(c, &cparam->tx_cq, &c->sq[tc].cq,
1429 		    &mlx5e_tx_cq_comp, tx_moderation_mode);
1430 		if (err)
1431 			goto err_close_tx_cqs;
1432 	}
1433 	return (0);
1434 
1435 err_close_tx_cqs:
1436 	for (tc--; tc >= 0; tc--)
1437 		mlx5e_close_cq(&c->sq[tc].cq);
1438 
1439 	return (err);
1440 }
1441 
1442 static void
1443 mlx5e_close_tx_cqs(struct mlx5e_channel *c)
1444 {
1445 	int tc;
1446 
1447 	for (tc = 0; tc < c->num_tc; tc++)
1448 		mlx5e_close_cq(&c->sq[tc].cq);
1449 }
1450 
1451 static int
1452 mlx5e_open_sqs(struct mlx5e_channel *c,
1453     struct mlx5e_channel_param *cparam)
1454 {
1455 	int err;
1456 	int tc;
1457 
1458 	for (tc = 0; tc < c->num_tc; tc++) {
1459 		err = mlx5e_open_sq(c, tc, &cparam->sq, &c->sq[tc]);
1460 		if (err)
1461 			goto err_close_sqs;
1462 	}
1463 
1464 	return (0);
1465 
1466 err_close_sqs:
1467 	for (tc--; tc >= 0; tc--)
1468 		mlx5e_close_sq_wait(&c->sq[tc]);
1469 
1470 	return (err);
1471 }
1472 
1473 static void
1474 mlx5e_close_sqs_wait(struct mlx5e_channel *c)
1475 {
1476 	int tc;
1477 
1478 	for (tc = 0; tc < c->num_tc; tc++)
1479 		mlx5e_close_sq_wait(&c->sq[tc]);
1480 }
1481 
1482 static void
1483 mlx5e_chan_mtx_init(struct mlx5e_channel *c)
1484 {
1485 	int tc;
1486 
1487 	mtx_init(&c->rq.mtx, "mlx5rx", MTX_NETWORK_LOCK, MTX_DEF);
1488 
1489 	for (tc = 0; tc < c->num_tc; tc++) {
1490 		struct mlx5e_sq *sq = c->sq + tc;
1491 
1492 		mtx_init(&sq->lock, "mlx5tx", MTX_NETWORK_LOCK, MTX_DEF);
1493 		mtx_init(&sq->comp_lock, "mlx5comp", MTX_NETWORK_LOCK,
1494 		    MTX_DEF);
1495 
1496 		callout_init_mtx(&sq->cev_callout, &sq->lock, 0);
1497 
1498 		sq->cev_factor = c->priv->params_ethtool.tx_completion_fact;
1499 
1500 		/* ensure the TX completion event factor is not zero */
1501 		if (sq->cev_factor == 0)
1502 			sq->cev_factor = 1;
1503 	}
1504 }
1505 
1506 static void
1507 mlx5e_chan_mtx_destroy(struct mlx5e_channel *c)
1508 {
1509 	int tc;
1510 
1511 	mtx_destroy(&c->rq.mtx);
1512 
1513 	for (tc = 0; tc < c->num_tc; tc++) {
1514 		mtx_destroy(&c->sq[tc].lock);
1515 		mtx_destroy(&c->sq[tc].comp_lock);
1516 	}
1517 }
1518 
1519 static int
1520 mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
1521     struct mlx5e_channel_param *cparam,
1522     struct mlx5e_channel *volatile *cp)
1523 {
1524 	struct mlx5e_channel *c;
1525 	u8 rx_moderation_mode;
1526 	int err;
1527 
1528 	c = malloc(sizeof(*c), M_MLX5EN, M_WAITOK | M_ZERO);
1529 	if (c == NULL)
1530 		return (-ENOMEM);
1531 
1532 	c->priv = priv;
1533 	c->ix = ix;
1534 	c->cpu = 0;
1535 	c->pdev = &priv->mdev->pdev->dev;
1536 	c->ifp = priv->ifp;
1537 	c->mkey_be = cpu_to_be32(priv->mr.key);
1538 	c->num_tc = priv->num_tc;
1539 
1540 	/* init mutexes */
1541 	mlx5e_chan_mtx_init(c);
1542 
1543 	/* open transmit completion queue */
1544 	err = mlx5e_open_tx_cqs(c, cparam);
1545 	if (err)
1546 		goto err_free;
1547 
1548 	switch (priv->params.rx_cq_moderation_mode) {
1549 	case 0:
1550 		rx_moderation_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
1551 		break;
1552 	default:
1553 		if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
1554 			rx_moderation_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE;
1555 		else
1556 			rx_moderation_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
1557 		break;
1558 	}
1559 
1560 	/* open receive completion queue */
1561 	err = mlx5e_open_cq(c, &cparam->rx_cq, &c->rq.cq,
1562 	    &mlx5e_rx_cq_comp, rx_moderation_mode);
1563 	if (err)
1564 		goto err_close_tx_cqs;
1565 
1566 	err = mlx5e_open_sqs(c, cparam);
1567 	if (err)
1568 		goto err_close_rx_cq;
1569 
1570 	err = mlx5e_open_rq(c, &cparam->rq, &c->rq);
1571 	if (err)
1572 		goto err_close_sqs;
1573 
1574 	/* store channel pointer */
1575 	*cp = c;
1576 
1577 	/* poll receive queue initially */
1578 	c->rq.cq.mcq.comp(&c->rq.cq.mcq);
1579 
1580 	return (0);
1581 
1582 err_close_sqs:
1583 	mlx5e_close_sqs_wait(c);
1584 
1585 err_close_rx_cq:
1586 	mlx5e_close_cq(&c->rq.cq);
1587 
1588 err_close_tx_cqs:
1589 	mlx5e_close_tx_cqs(c);
1590 
1591 err_free:
1592 	/* destroy mutexes */
1593 	mlx5e_chan_mtx_destroy(c);
1594 	free(c, M_MLX5EN);
1595 	return (err);
1596 }
1597 
1598 static void
1599 mlx5e_close_channel(struct mlx5e_channel *volatile *pp)
1600 {
1601 	struct mlx5e_channel *c = *pp;
1602 
1603 	/* check if channel is already closed */
1604 	if (c == NULL)
1605 		return;
1606 	mlx5e_close_rq(&c->rq);
1607 }
1608 
1609 static void
1610 mlx5e_close_channel_wait(struct mlx5e_channel *volatile *pp)
1611 {
1612 	struct mlx5e_channel *c = *pp;
1613 
1614 	/* check if channel is already closed */
1615 	if (c == NULL)
1616 		return;
1617 	/* ensure channel pointer is no longer used */
1618 	*pp = NULL;
1619 
1620 	mlx5e_close_rq_wait(&c->rq);
1621 	mlx5e_close_sqs_wait(c);
1622 	mlx5e_close_cq(&c->rq.cq);
1623 	mlx5e_close_tx_cqs(c);
1624 	/* destroy mutexes */
1625 	mlx5e_chan_mtx_destroy(c);
1626 	free(c, M_MLX5EN);
1627 }
1628 
1629 static void
1630 mlx5e_build_rq_param(struct mlx5e_priv *priv,
1631     struct mlx5e_rq_param *param)
1632 {
1633 	void *rqc = param->rqc;
1634 	void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
1635 
1636 	MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST);
1637 	MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
1638 	MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe)));
1639 	MLX5_SET(wq, wq, log_wq_sz, priv->params.log_rq_size);
1640 	MLX5_SET(wq, wq, pd, priv->pdn);
1641 
1642 	param->wq.buf_numa_node = 0;
1643 	param->wq.db_numa_node = 0;
1644 	param->wq.linear = 1;
1645 }
1646 
1647 static void
1648 mlx5e_build_sq_param(struct mlx5e_priv *priv,
1649     struct mlx5e_sq_param *param)
1650 {
1651 	void *sqc = param->sqc;
1652 	void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
1653 
1654 	MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size);
1655 	MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
1656 	MLX5_SET(wq, wq, pd, priv->pdn);
1657 
1658 	param->wq.buf_numa_node = 0;
1659 	param->wq.db_numa_node = 0;
1660 	param->wq.linear = 1;
1661 }
1662 
1663 static void
1664 mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
1665     struct mlx5e_cq_param *param)
1666 {
1667 	void *cqc = param->cqc;
1668 
1669 	MLX5_SET(cqc, cqc, uar_page, priv->cq_uar.index);
1670 }
1671 
1672 static void
1673 mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
1674     struct mlx5e_cq_param *param)
1675 {
1676 	void *cqc = param->cqc;
1677 
1678 
1679 	/*
1680 	 * TODO The sysctl to control on/off is a bool value for now, which means
1681 	 * we only support CSUM, once HASH is implemnted we'll need to address that.
1682 	 */
1683 	if (priv->params.cqe_zipping_en) {
1684 		MLX5_SET(cqc, cqc, mini_cqe_res_format, MLX5_CQE_FORMAT_CSUM);
1685 		MLX5_SET(cqc, cqc, cqe_compression_en, 1);
1686 	}
1687 
1688 	MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_rq_size);
1689 	MLX5_SET(cqc, cqc, cq_period, priv->params.rx_cq_moderation_usec);
1690 	MLX5_SET(cqc, cqc, cq_max_count, priv->params.rx_cq_moderation_pkts);
1691 
1692 	mlx5e_build_common_cq_param(priv, param);
1693 }
1694 
1695 static void
1696 mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
1697     struct mlx5e_cq_param *param)
1698 {
1699 	void *cqc = param->cqc;
1700 
1701 	MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_sq_size);
1702 	MLX5_SET(cqc, cqc, cq_period, priv->params.tx_cq_moderation_usec);
1703 	MLX5_SET(cqc, cqc, cq_max_count, priv->params.tx_cq_moderation_pkts);
1704 
1705 	mlx5e_build_common_cq_param(priv, param);
1706 }
1707 
1708 static void
1709 mlx5e_build_channel_param(struct mlx5e_priv *priv,
1710     struct mlx5e_channel_param *cparam)
1711 {
1712 	memset(cparam, 0, sizeof(*cparam));
1713 
1714 	mlx5e_build_rq_param(priv, &cparam->rq);
1715 	mlx5e_build_sq_param(priv, &cparam->sq);
1716 	mlx5e_build_rx_cq_param(priv, &cparam->rx_cq);
1717 	mlx5e_build_tx_cq_param(priv, &cparam->tx_cq);
1718 }
1719 
1720 static int
1721 mlx5e_open_channels(struct mlx5e_priv *priv)
1722 {
1723 	struct mlx5e_channel_param cparam;
1724 	void *ptr;
1725 	int err;
1726 	int i;
1727 	int j;
1728 
1729 	priv->channel = malloc(priv->params.num_channels *
1730 	    sizeof(struct mlx5e_channel *), M_MLX5EN, M_WAITOK | M_ZERO);
1731 	if (priv->channel == NULL)
1732 		return (-ENOMEM);
1733 
1734 	mlx5e_build_channel_param(priv, &cparam);
1735 	for (i = 0; i < priv->params.num_channels; i++) {
1736 		err = mlx5e_open_channel(priv, i, &cparam, &priv->channel[i]);
1737 		if (err)
1738 			goto err_close_channels;
1739 	}
1740 
1741 	for (j = 0; j < priv->params.num_channels; j++) {
1742 		err = mlx5e_wait_for_min_rx_wqes(&priv->channel[j]->rq);
1743 		if (err)
1744 			goto err_close_channels;
1745 	}
1746 
1747 	return (0);
1748 
1749 err_close_channels:
1750 	for (i--; i >= 0; i--) {
1751 		mlx5e_close_channel(&priv->channel[i]);
1752 		mlx5e_close_channel_wait(&priv->channel[i]);
1753 	}
1754 
1755 	/* remove "volatile" attribute from "channel" pointer */
1756 	ptr = __DECONST(void *, priv->channel);
1757 	priv->channel = NULL;
1758 
1759 	free(ptr, M_MLX5EN);
1760 
1761 	return (err);
1762 }
1763 
1764 static void
1765 mlx5e_close_channels(struct mlx5e_priv *priv)
1766 {
1767 	void *ptr;
1768 	int i;
1769 
1770 	if (priv->channel == NULL)
1771 		return;
1772 
1773 	for (i = 0; i < priv->params.num_channels; i++)
1774 		mlx5e_close_channel(&priv->channel[i]);
1775 	for (i = 0; i < priv->params.num_channels; i++)
1776 		mlx5e_close_channel_wait(&priv->channel[i]);
1777 
1778 	/* remove "volatile" attribute from "channel" pointer */
1779 	ptr = __DECONST(void *, priv->channel);
1780 	priv->channel = NULL;
1781 
1782 	free(ptr, M_MLX5EN);
1783 }
1784 
1785 static int
1786 mlx5e_refresh_sq_params(struct mlx5e_priv *priv, struct mlx5e_sq *sq)
1787 {
1788 	return (mlx5_core_modify_cq_moderation(priv->mdev, &sq->cq.mcq,
1789 	    priv->params.tx_cq_moderation_usec,
1790 	    priv->params.tx_cq_moderation_pkts));
1791 }
1792 
1793 static int
1794 mlx5e_refresh_rq_params(struct mlx5e_priv *priv, struct mlx5e_rq *rq)
1795 {
1796 	return (mlx5_core_modify_cq_moderation(priv->mdev, &rq->cq.mcq,
1797 	    priv->params.rx_cq_moderation_usec,
1798 	    priv->params.rx_cq_moderation_pkts));
1799 }
1800 
1801 static int
1802 mlx5e_refresh_channel_params_sub(struct mlx5e_priv *priv, struct mlx5e_channel *c)
1803 {
1804 	int err;
1805 	int i;
1806 
1807 	if (c == NULL)
1808 		return (EINVAL);
1809 
1810 	err = mlx5e_refresh_rq_params(priv, &c->rq);
1811 	if (err)
1812 		goto done;
1813 
1814 	for (i = 0; i != c->num_tc; i++) {
1815 		err = mlx5e_refresh_sq_params(priv, &c->sq[i]);
1816 		if (err)
1817 			goto done;
1818 	}
1819 done:
1820 	return (err);
1821 }
1822 
1823 int
1824 mlx5e_refresh_channel_params(struct mlx5e_priv *priv)
1825 {
1826 	int i;
1827 
1828 	if (priv->channel == NULL)
1829 		return (EINVAL);
1830 
1831 	for (i = 0; i < priv->params.num_channels; i++) {
1832 		int err;
1833 
1834 		err = mlx5e_refresh_channel_params_sub(priv, priv->channel[i]);
1835 		if (err)
1836 			return (err);
1837 	}
1838 	return (0);
1839 }
1840 
1841 static int
1842 mlx5e_open_tis(struct mlx5e_priv *priv, int tc)
1843 {
1844 	struct mlx5_core_dev *mdev = priv->mdev;
1845 	u32 in[MLX5_ST_SZ_DW(create_tis_in)];
1846 	void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
1847 
1848 	memset(in, 0, sizeof(in));
1849 
1850 	MLX5_SET(tisc, tisc, prio, tc);
1851 	MLX5_SET(tisc, tisc, transport_domain, priv->tdn);
1852 
1853 	return (mlx5_core_create_tis(mdev, in, sizeof(in), &priv->tisn[tc]));
1854 }
1855 
1856 static void
1857 mlx5e_close_tis(struct mlx5e_priv *priv, int tc)
1858 {
1859 	mlx5_core_destroy_tis(priv->mdev, priv->tisn[tc]);
1860 }
1861 
1862 static int
1863 mlx5e_open_tises(struct mlx5e_priv *priv)
1864 {
1865 	int num_tc = priv->num_tc;
1866 	int err;
1867 	int tc;
1868 
1869 	for (tc = 0; tc < num_tc; tc++) {
1870 		err = mlx5e_open_tis(priv, tc);
1871 		if (err)
1872 			goto err_close_tises;
1873 	}
1874 
1875 	return (0);
1876 
1877 err_close_tises:
1878 	for (tc--; tc >= 0; tc--)
1879 		mlx5e_close_tis(priv, tc);
1880 
1881 	return (err);
1882 }
1883 
1884 static void
1885 mlx5e_close_tises(struct mlx5e_priv *priv)
1886 {
1887 	int num_tc = priv->num_tc;
1888 	int tc;
1889 
1890 	for (tc = 0; tc < num_tc; tc++)
1891 		mlx5e_close_tis(priv, tc);
1892 }
1893 
1894 static int
1895 mlx5e_open_rqt(struct mlx5e_priv *priv)
1896 {
1897 	struct mlx5_core_dev *mdev = priv->mdev;
1898 	u32 *in;
1899 	u32 out[MLX5_ST_SZ_DW(create_rqt_out)];
1900 	void *rqtc;
1901 	int inlen;
1902 	int err;
1903 	int sz;
1904 	int i;
1905 
1906 	sz = 1 << priv->params.rx_hash_log_tbl_sz;
1907 
1908 	inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
1909 	in = mlx5_vzalloc(inlen);
1910 	if (in == NULL)
1911 		return (-ENOMEM);
1912 	rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
1913 
1914 	MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
1915 	MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
1916 
1917 	for (i = 0; i < sz; i++) {
1918 		int ix;
1919 #ifdef RSS
1920 		ix = rss_get_indirection_to_bucket(i);
1921 #else
1922 		ix = i;
1923 #endif
1924 		/* ensure we don't overflow */
1925 		ix %= priv->params.num_channels;
1926 		MLX5_SET(rqtc, rqtc, rq_num[i], priv->channel[ix]->rq.rqn);
1927 	}
1928 
1929 	MLX5_SET(create_rqt_in, in, opcode, MLX5_CMD_OP_CREATE_RQT);
1930 
1931 	memset(out, 0, sizeof(out));
1932 	err = mlx5_cmd_exec_check_status(mdev, in, inlen, out, sizeof(out));
1933 	if (!err)
1934 		priv->rqtn = MLX5_GET(create_rqt_out, out, rqtn);
1935 
1936 	kvfree(in);
1937 
1938 	return (err);
1939 }
1940 
1941 static void
1942 mlx5e_close_rqt(struct mlx5e_priv *priv)
1943 {
1944 	u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)];
1945 	u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)];
1946 
1947 	memset(in, 0, sizeof(in));
1948 
1949 	MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT);
1950 	MLX5_SET(destroy_rqt_in, in, rqtn, priv->rqtn);
1951 
1952 	mlx5_cmd_exec_check_status(priv->mdev, in, sizeof(in), out,
1953 	    sizeof(out));
1954 }
1955 
1956 static void
1957 mlx5e_build_tir_ctx(struct mlx5e_priv *priv, u32 * tirc, int tt)
1958 {
1959 	void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
1960 	__be32 *hkey;
1961 
1962 	MLX5_SET(tirc, tirc, transport_domain, priv->tdn);
1963 
1964 #define	ROUGH_MAX_L2_L3_HDR_SZ 256
1965 
1966 #define	MLX5_HASH_IP     (MLX5_HASH_FIELD_SEL_SRC_IP   |\
1967 			  MLX5_HASH_FIELD_SEL_DST_IP)
1968 
1969 #define	MLX5_HASH_ALL    (MLX5_HASH_FIELD_SEL_SRC_IP   |\
1970 			  MLX5_HASH_FIELD_SEL_DST_IP   |\
1971 			  MLX5_HASH_FIELD_SEL_L4_SPORT |\
1972 			  MLX5_HASH_FIELD_SEL_L4_DPORT)
1973 
1974 #define	MLX5_HASH_IP_IPSEC_SPI	(MLX5_HASH_FIELD_SEL_SRC_IP   |\
1975 				 MLX5_HASH_FIELD_SEL_DST_IP   |\
1976 				 MLX5_HASH_FIELD_SEL_IPSEC_SPI)
1977 
1978 	if (priv->params.hw_lro_en) {
1979 		MLX5_SET(tirc, tirc, lro_enable_mask,
1980 		    MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO |
1981 		    MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO);
1982 		MLX5_SET(tirc, tirc, lro_max_msg_sz,
1983 		    (priv->params.lro_wqe_sz -
1984 		    ROUGH_MAX_L2_L3_HDR_SZ) >> 8);
1985 		/* TODO: add the option to choose timer value dynamically */
1986 		MLX5_SET(tirc, tirc, lro_timeout_period_usecs,
1987 		    MLX5_CAP_ETH(priv->mdev,
1988 		    lro_timer_supported_periods[2]));
1989 	}
1990 
1991 	/* setup parameters for hashing TIR type, if any */
1992 	switch (tt) {
1993 	case MLX5E_TT_ANY:
1994 		MLX5_SET(tirc, tirc, disp_type,
1995 		    MLX5_TIRC_DISP_TYPE_DIRECT);
1996 		MLX5_SET(tirc, tirc, inline_rqn,
1997 		    priv->channel[0]->rq.rqn);
1998 		break;
1999 	default:
2000 		MLX5_SET(tirc, tirc, disp_type,
2001 		    MLX5_TIRC_DISP_TYPE_INDIRECT);
2002 		MLX5_SET(tirc, tirc, indirect_table,
2003 		    priv->rqtn);
2004 		MLX5_SET(tirc, tirc, rx_hash_fn,
2005 		    MLX5_TIRC_RX_HASH_FN_HASH_TOEPLITZ);
2006 		hkey = (__be32 *) MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
2007 #ifdef RSS
2008 		/*
2009 		 * The FreeBSD RSS implementation does currently not
2010 		 * support symmetric Toeplitz hashes:
2011 		 */
2012 		MLX5_SET(tirc, tirc, rx_hash_symmetric, 0);
2013 		rss_getkey((uint8_t *)hkey);
2014 #else
2015 		MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
2016 		hkey[0] = cpu_to_be32(0xD181C62C);
2017 		hkey[1] = cpu_to_be32(0xF7F4DB5B);
2018 		hkey[2] = cpu_to_be32(0x1983A2FC);
2019 		hkey[3] = cpu_to_be32(0x943E1ADB);
2020 		hkey[4] = cpu_to_be32(0xD9389E6B);
2021 		hkey[5] = cpu_to_be32(0xD1039C2C);
2022 		hkey[6] = cpu_to_be32(0xA74499AD);
2023 		hkey[7] = cpu_to_be32(0x593D56D9);
2024 		hkey[8] = cpu_to_be32(0xF3253C06);
2025 		hkey[9] = cpu_to_be32(0x2ADC1FFC);
2026 #endif
2027 		break;
2028 	}
2029 
2030 	switch (tt) {
2031 	case MLX5E_TT_IPV4_TCP:
2032 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2033 		    MLX5_L3_PROT_TYPE_IPV4);
2034 		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2035 		    MLX5_L4_PROT_TYPE_TCP);
2036 #ifdef RSS
2037 		if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV4)) {
2038 			MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2039 			    MLX5_HASH_IP);
2040 		} else
2041 #endif
2042 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2043 		    MLX5_HASH_ALL);
2044 		break;
2045 
2046 	case MLX5E_TT_IPV6_TCP:
2047 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2048 		    MLX5_L3_PROT_TYPE_IPV6);
2049 		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2050 		    MLX5_L4_PROT_TYPE_TCP);
2051 #ifdef RSS
2052 		if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV6)) {
2053 			MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2054 			    MLX5_HASH_IP);
2055 		} else
2056 #endif
2057 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2058 		    MLX5_HASH_ALL);
2059 		break;
2060 
2061 	case MLX5E_TT_IPV4_UDP:
2062 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2063 		    MLX5_L3_PROT_TYPE_IPV4);
2064 		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2065 		    MLX5_L4_PROT_TYPE_UDP);
2066 #ifdef RSS
2067 		if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV4)) {
2068 			MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2069 			    MLX5_HASH_IP);
2070 		} else
2071 #endif
2072 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2073 		    MLX5_HASH_ALL);
2074 		break;
2075 
2076 	case MLX5E_TT_IPV6_UDP:
2077 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2078 		    MLX5_L3_PROT_TYPE_IPV6);
2079 		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2080 		    MLX5_L4_PROT_TYPE_UDP);
2081 #ifdef RSS
2082 		if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV6)) {
2083 			MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2084 			    MLX5_HASH_IP);
2085 		} else
2086 #endif
2087 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2088 		    MLX5_HASH_ALL);
2089 		break;
2090 
2091 	case MLX5E_TT_IPV4_IPSEC_AH:
2092 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2093 		    MLX5_L3_PROT_TYPE_IPV4);
2094 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2095 		    MLX5_HASH_IP_IPSEC_SPI);
2096 		break;
2097 
2098 	case MLX5E_TT_IPV6_IPSEC_AH:
2099 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2100 		    MLX5_L3_PROT_TYPE_IPV6);
2101 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2102 		    MLX5_HASH_IP_IPSEC_SPI);
2103 		break;
2104 
2105 	case MLX5E_TT_IPV4_IPSEC_ESP:
2106 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2107 		    MLX5_L3_PROT_TYPE_IPV4);
2108 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2109 		    MLX5_HASH_IP_IPSEC_SPI);
2110 		break;
2111 
2112 	case MLX5E_TT_IPV6_IPSEC_ESP:
2113 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2114 		    MLX5_L3_PROT_TYPE_IPV6);
2115 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2116 		    MLX5_HASH_IP_IPSEC_SPI);
2117 		break;
2118 
2119 	case MLX5E_TT_IPV4:
2120 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2121 		    MLX5_L3_PROT_TYPE_IPV4);
2122 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2123 		    MLX5_HASH_IP);
2124 		break;
2125 
2126 	case MLX5E_TT_IPV6:
2127 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2128 		    MLX5_L3_PROT_TYPE_IPV6);
2129 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2130 		    MLX5_HASH_IP);
2131 		break;
2132 
2133 	default:
2134 		break;
2135 	}
2136 }
2137 
2138 static int
2139 mlx5e_open_tir(struct mlx5e_priv *priv, int tt)
2140 {
2141 	struct mlx5_core_dev *mdev = priv->mdev;
2142 	u32 *in;
2143 	void *tirc;
2144 	int inlen;
2145 	int err;
2146 
2147 	inlen = MLX5_ST_SZ_BYTES(create_tir_in);
2148 	in = mlx5_vzalloc(inlen);
2149 	if (in == NULL)
2150 		return (-ENOMEM);
2151 	tirc = MLX5_ADDR_OF(create_tir_in, in, tir_context);
2152 
2153 	mlx5e_build_tir_ctx(priv, tirc, tt);
2154 
2155 	err = mlx5_core_create_tir(mdev, in, inlen, &priv->tirn[tt]);
2156 
2157 	kvfree(in);
2158 
2159 	return (err);
2160 }
2161 
2162 static void
2163 mlx5e_close_tir(struct mlx5e_priv *priv, int tt)
2164 {
2165 	mlx5_core_destroy_tir(priv->mdev, priv->tirn[tt]);
2166 }
2167 
2168 static int
2169 mlx5e_open_tirs(struct mlx5e_priv *priv)
2170 {
2171 	int err;
2172 	int i;
2173 
2174 	for (i = 0; i < MLX5E_NUM_TT; i++) {
2175 		err = mlx5e_open_tir(priv, i);
2176 		if (err)
2177 			goto err_close_tirs;
2178 	}
2179 
2180 	return (0);
2181 
2182 err_close_tirs:
2183 	for (i--; i >= 0; i--)
2184 		mlx5e_close_tir(priv, i);
2185 
2186 	return (err);
2187 }
2188 
2189 static void
2190 mlx5e_close_tirs(struct mlx5e_priv *priv)
2191 {
2192 	int i;
2193 
2194 	for (i = 0; i < MLX5E_NUM_TT; i++)
2195 		mlx5e_close_tir(priv, i);
2196 }
2197 
2198 /*
2199  * SW MTU does not include headers,
2200  * HW MTU includes all headers and checksums.
2201  */
2202 static int
2203 mlx5e_set_dev_port_mtu(struct ifnet *ifp, int sw_mtu)
2204 {
2205 	struct mlx5e_priv *priv = ifp->if_softc;
2206 	struct mlx5_core_dev *mdev = priv->mdev;
2207 	int hw_mtu;
2208 	int err;
2209 
2210 
2211 	err = mlx5_set_port_mtu(mdev, MLX5E_SW2HW_MTU(sw_mtu));
2212 	if (err) {
2213 		if_printf(ifp, "%s: mlx5_set_port_mtu failed setting %d, err=%d\n",
2214 		    __func__, sw_mtu, err);
2215 		return (err);
2216 	}
2217 	err = mlx5_query_port_oper_mtu(mdev, &hw_mtu);
2218 	if (!err) {
2219 		ifp->if_mtu = MLX5E_HW2SW_MTU(hw_mtu);
2220 
2221 		if (ifp->if_mtu != sw_mtu) {
2222 			if_printf(ifp, "Port MTU %d is different than "
2223 			    "ifp mtu %d\n", sw_mtu, (int)ifp->if_mtu);
2224 		}
2225 	} else {
2226 		if_printf(ifp, "Query port MTU, after setting new "
2227 		    "MTU value, failed\n");
2228 		ifp->if_mtu = sw_mtu;
2229 	}
2230 	return (0);
2231 }
2232 
2233 int
2234 mlx5e_open_locked(struct ifnet *ifp)
2235 {
2236 	struct mlx5e_priv *priv = ifp->if_softc;
2237 	int err;
2238 
2239 	/* check if already opened */
2240 	if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0)
2241 		return (0);
2242 
2243 #ifdef RSS
2244 	if (rss_getnumbuckets() > priv->params.num_channels) {
2245 		if_printf(ifp, "NOTE: There are more RSS buckets(%u) than "
2246 		    "channels(%u) available\n", rss_getnumbuckets(),
2247 		    priv->params.num_channels);
2248 	}
2249 #endif
2250 	err = mlx5e_open_tises(priv);
2251 	if (err) {
2252 		if_printf(ifp, "%s: mlx5e_open_tises failed, %d\n",
2253 		    __func__, err);
2254 		return (err);
2255 	}
2256 	err = mlx5_vport_alloc_q_counter(priv->mdev, &priv->counter_set_id);
2257 	if (err) {
2258 		if_printf(priv->ifp,
2259 		    "%s: mlx5_vport_alloc_q_counter failed: %d\n",
2260 		    __func__, err);
2261 		goto err_close_tises;
2262 	}
2263 	err = mlx5e_open_channels(priv);
2264 	if (err) {
2265 		if_printf(ifp, "%s: mlx5e_open_channels failed, %d\n",
2266 		    __func__, err);
2267 		goto err_dalloc_q_counter;
2268 	}
2269 	err = mlx5e_open_rqt(priv);
2270 	if (err) {
2271 		if_printf(ifp, "%s: mlx5e_open_rqt failed, %d\n",
2272 		    __func__, err);
2273 		goto err_close_channels;
2274 	}
2275 	err = mlx5e_open_tirs(priv);
2276 	if (err) {
2277 		if_printf(ifp, "%s: mlx5e_open_tir failed, %d\n",
2278 		    __func__, err);
2279 		goto err_close_rqls;
2280 	}
2281 	err = mlx5e_open_flow_table(priv);
2282 	if (err) {
2283 		if_printf(ifp, "%s: mlx5e_open_flow_table failed, %d\n",
2284 		    __func__, err);
2285 		goto err_close_tirs;
2286 	}
2287 	err = mlx5e_add_all_vlan_rules(priv);
2288 	if (err) {
2289 		if_printf(ifp, "%s: mlx5e_add_all_vlan_rules failed, %d\n",
2290 		    __func__, err);
2291 		goto err_close_flow_table;
2292 	}
2293 	set_bit(MLX5E_STATE_OPENED, &priv->state);
2294 
2295 	mlx5e_update_carrier(priv);
2296 	mlx5e_set_rx_mode_core(priv);
2297 
2298 	return (0);
2299 
2300 err_close_flow_table:
2301 	mlx5e_close_flow_table(priv);
2302 
2303 err_close_tirs:
2304 	mlx5e_close_tirs(priv);
2305 
2306 err_close_rqls:
2307 	mlx5e_close_rqt(priv);
2308 
2309 err_close_channels:
2310 	mlx5e_close_channels(priv);
2311 
2312 err_dalloc_q_counter:
2313 	mlx5_vport_dealloc_q_counter(priv->mdev, priv->counter_set_id);
2314 
2315 err_close_tises:
2316 	mlx5e_close_tises(priv);
2317 
2318 	return (err);
2319 }
2320 
2321 static void
2322 mlx5e_open(void *arg)
2323 {
2324 	struct mlx5e_priv *priv = arg;
2325 
2326 	PRIV_LOCK(priv);
2327 	if (mlx5_set_port_status(priv->mdev, MLX5_PORT_UP))
2328 		if_printf(priv->ifp,
2329 		    "%s: Setting port status to up failed\n",
2330 		    __func__);
2331 
2332 	mlx5e_open_locked(priv->ifp);
2333 	priv->ifp->if_drv_flags |= IFF_DRV_RUNNING;
2334 	PRIV_UNLOCK(priv);
2335 }
2336 
2337 int
2338 mlx5e_close_locked(struct ifnet *ifp)
2339 {
2340 	struct mlx5e_priv *priv = ifp->if_softc;
2341 
2342 	/* check if already closed */
2343 	if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
2344 		return (0);
2345 
2346 	clear_bit(MLX5E_STATE_OPENED, &priv->state);
2347 
2348 	mlx5e_set_rx_mode_core(priv);
2349 	mlx5e_del_all_vlan_rules(priv);
2350 	if_link_state_change(priv->ifp, LINK_STATE_DOWN);
2351 	mlx5e_close_flow_table(priv);
2352 	mlx5e_close_tirs(priv);
2353 	mlx5e_close_rqt(priv);
2354 	mlx5e_close_channels(priv);
2355 	mlx5_vport_dealloc_q_counter(priv->mdev, priv->counter_set_id);
2356 	mlx5e_close_tises(priv);
2357 
2358 	return (0);
2359 }
2360 
2361 #if (__FreeBSD_version >= 1100000)
2362 static uint64_t
2363 mlx5e_get_counter(struct ifnet *ifp, ift_counter cnt)
2364 {
2365 	struct mlx5e_priv *priv = ifp->if_softc;
2366 	u64 retval;
2367 
2368 	/* PRIV_LOCK(priv); XXX not allowed */
2369 	switch (cnt) {
2370 	case IFCOUNTER_IPACKETS:
2371 		retval = priv->stats.vport.rx_packets;
2372 		break;
2373 	case IFCOUNTER_IERRORS:
2374 		retval = priv->stats.vport.rx_error_packets;
2375 		break;
2376 	case IFCOUNTER_IQDROPS:
2377 		retval = priv->stats.vport.rx_out_of_buffer;
2378 		break;
2379 	case IFCOUNTER_OPACKETS:
2380 		retval = priv->stats.vport.tx_packets;
2381 		break;
2382 	case IFCOUNTER_OERRORS:
2383 		retval = priv->stats.vport.tx_error_packets;
2384 		break;
2385 	case IFCOUNTER_IBYTES:
2386 		retval = priv->stats.vport.rx_bytes;
2387 		break;
2388 	case IFCOUNTER_OBYTES:
2389 		retval = priv->stats.vport.tx_bytes;
2390 		break;
2391 	case IFCOUNTER_IMCASTS:
2392 		retval = priv->stats.vport.rx_multicast_packets;
2393 		break;
2394 	case IFCOUNTER_OMCASTS:
2395 		retval = priv->stats.vport.tx_multicast_packets;
2396 		break;
2397 	case IFCOUNTER_OQDROPS:
2398 		retval = priv->stats.vport.tx_queue_dropped;
2399 		break;
2400 	default:
2401 		retval = if_get_counter_default(ifp, cnt);
2402 		break;
2403 	}
2404 	/* PRIV_UNLOCK(priv); XXX not allowed */
2405 	return (retval);
2406 }
2407 #endif
2408 
2409 static void
2410 mlx5e_set_rx_mode(struct ifnet *ifp)
2411 {
2412 	struct mlx5e_priv *priv = ifp->if_softc;
2413 
2414 	schedule_work(&priv->set_rx_mode_work);
2415 }
2416 
2417 static int
2418 mlx5e_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
2419 {
2420 	struct mlx5e_priv *priv;
2421 	struct ifreq *ifr;
2422 	struct ifi2creq i2c;
2423 	int error = 0;
2424 	int mask = 0;
2425 	int size_read = 0;
2426 	int module_num;
2427 	int max_mtu;
2428 	uint8_t read_addr;
2429 
2430 	priv = ifp->if_softc;
2431 
2432 	/* check if detaching */
2433 	if (priv == NULL || priv->gone != 0)
2434 		return (ENXIO);
2435 
2436 	switch (command) {
2437 	case SIOCSIFMTU:
2438 		ifr = (struct ifreq *)data;
2439 
2440 		PRIV_LOCK(priv);
2441 		mlx5_query_port_max_mtu(priv->mdev, &max_mtu);
2442 
2443 		if (ifr->ifr_mtu >= MLX5E_MTU_MIN &&
2444 		    ifr->ifr_mtu <= MIN(MLX5E_MTU_MAX, max_mtu)) {
2445 			int was_opened;
2446 
2447 			was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
2448 			if (was_opened)
2449 				mlx5e_close_locked(ifp);
2450 
2451 			/* set new MTU */
2452 			mlx5e_set_dev_port_mtu(ifp, ifr->ifr_mtu);
2453 
2454 			if (was_opened)
2455 				mlx5e_open_locked(ifp);
2456 		} else {
2457 			error = EINVAL;
2458 			if_printf(ifp, "Invalid MTU value. Min val: %d, Max val: %d\n",
2459 			    MLX5E_MTU_MIN, MIN(MLX5E_MTU_MAX, max_mtu));
2460 		}
2461 		PRIV_UNLOCK(priv);
2462 		break;
2463 	case SIOCSIFFLAGS:
2464 		if ((ifp->if_flags & IFF_UP) &&
2465 		    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
2466 			mlx5e_set_rx_mode(ifp);
2467 			break;
2468 		}
2469 		PRIV_LOCK(priv);
2470 		if (ifp->if_flags & IFF_UP) {
2471 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2472 				if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
2473 					mlx5e_open_locked(ifp);
2474 				ifp->if_drv_flags |= IFF_DRV_RUNNING;
2475 				mlx5_set_port_status(priv->mdev, MLX5_PORT_UP);
2476 			}
2477 		} else {
2478 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2479 				mlx5_set_port_status(priv->mdev,
2480 				    MLX5_PORT_DOWN);
2481 				if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0)
2482 					mlx5e_close_locked(ifp);
2483 				mlx5e_update_carrier(priv);
2484 				ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2485 			}
2486 		}
2487 		PRIV_UNLOCK(priv);
2488 		break;
2489 	case SIOCADDMULTI:
2490 	case SIOCDELMULTI:
2491 		mlx5e_set_rx_mode(ifp);
2492 		break;
2493 	case SIOCSIFMEDIA:
2494 	case SIOCGIFMEDIA:
2495 	case SIOCGIFXMEDIA:
2496 		ifr = (struct ifreq *)data;
2497 		error = ifmedia_ioctl(ifp, ifr, &priv->media, command);
2498 		break;
2499 	case SIOCSIFCAP:
2500 		ifr = (struct ifreq *)data;
2501 		PRIV_LOCK(priv);
2502 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2503 
2504 		if (mask & IFCAP_TXCSUM) {
2505 			ifp->if_capenable ^= IFCAP_TXCSUM;
2506 			ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
2507 
2508 			if (IFCAP_TSO4 & ifp->if_capenable &&
2509 			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
2510 				ifp->if_capenable &= ~IFCAP_TSO4;
2511 				ifp->if_hwassist &= ~CSUM_IP_TSO;
2512 				if_printf(ifp,
2513 				    "tso4 disabled due to -txcsum.\n");
2514 			}
2515 		}
2516 		if (mask & IFCAP_TXCSUM_IPV6) {
2517 			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
2518 			ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
2519 
2520 			if (IFCAP_TSO6 & ifp->if_capenable &&
2521 			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
2522 				ifp->if_capenable &= ~IFCAP_TSO6;
2523 				ifp->if_hwassist &= ~CSUM_IP6_TSO;
2524 				if_printf(ifp,
2525 				    "tso6 disabled due to -txcsum6.\n");
2526 			}
2527 		}
2528 		if (mask & IFCAP_RXCSUM)
2529 			ifp->if_capenable ^= IFCAP_RXCSUM;
2530 		if (mask & IFCAP_RXCSUM_IPV6)
2531 			ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
2532 		if (mask & IFCAP_TSO4) {
2533 			if (!(IFCAP_TSO4 & ifp->if_capenable) &&
2534 			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
2535 				if_printf(ifp, "enable txcsum first.\n");
2536 				error = EAGAIN;
2537 				goto out;
2538 			}
2539 			ifp->if_capenable ^= IFCAP_TSO4;
2540 			ifp->if_hwassist ^= CSUM_IP_TSO;
2541 		}
2542 		if (mask & IFCAP_TSO6) {
2543 			if (!(IFCAP_TSO6 & ifp->if_capenable) &&
2544 			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
2545 				if_printf(ifp, "enable txcsum6 first.\n");
2546 				error = EAGAIN;
2547 				goto out;
2548 			}
2549 			ifp->if_capenable ^= IFCAP_TSO6;
2550 			ifp->if_hwassist ^= CSUM_IP6_TSO;
2551 		}
2552 		if (mask & IFCAP_VLAN_HWFILTER) {
2553 			if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
2554 				mlx5e_disable_vlan_filter(priv);
2555 			else
2556 				mlx5e_enable_vlan_filter(priv);
2557 
2558 			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
2559 		}
2560 		if (mask & IFCAP_VLAN_HWTAGGING)
2561 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2562 		if (mask & IFCAP_WOL_MAGIC)
2563 			ifp->if_capenable ^= IFCAP_WOL_MAGIC;
2564 
2565 		VLAN_CAPABILITIES(ifp);
2566 		/* turn off LRO means also turn of HW LRO - if it's on */
2567 		if (mask & IFCAP_LRO) {
2568 			int was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
2569 			bool need_restart = false;
2570 
2571 			ifp->if_capenable ^= IFCAP_LRO;
2572 			if (!(ifp->if_capenable & IFCAP_LRO)) {
2573 				if (priv->params.hw_lro_en) {
2574 					priv->params.hw_lro_en = false;
2575 					need_restart = true;
2576 					/* Not sure this is the correct way */
2577 					priv->params_ethtool.hw_lro = priv->params.hw_lro_en;
2578 				}
2579 			}
2580 			if (was_opened && need_restart) {
2581 				mlx5e_close_locked(ifp);
2582 				mlx5e_open_locked(ifp);
2583 			}
2584 		}
2585 out:
2586 		PRIV_UNLOCK(priv);
2587 		break;
2588 
2589 	case SIOCGI2C:
2590 		ifr = (struct ifreq *)data;
2591 
2592 		/*
2593 		 * Copy from the user-space address ifr_data to the
2594 		 * kernel-space address i2c
2595 		 */
2596 		error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
2597 		if (error)
2598 			break;
2599 
2600 		if (i2c.len > sizeof(i2c.data)) {
2601 			error = EINVAL;
2602 			break;
2603 		}
2604 
2605 		PRIV_LOCK(priv);
2606 		/* Get module_num which is required for the query_eeprom */
2607 		error = mlx5_query_module_num(priv->mdev, &module_num);
2608 		if (error) {
2609 			if_printf(ifp, "Query module num failed, eeprom "
2610 			    "reading is not supported\n");
2611 			error = EINVAL;
2612 			goto err_i2c;
2613 		}
2614 		/* Check if module is present before doing an access */
2615 		if (mlx5_query_module_status(priv->mdev, module_num) !=
2616 		    MLX5_MODULE_STATUS_PLUGGED) {
2617 			error = EINVAL;
2618 			goto err_i2c;
2619 		}
2620 		/*
2621 		 * Currently 0XA0 and 0xA2 are the only addresses permitted.
2622 		 * The internal conversion is as follows:
2623 		 */
2624 		if (i2c.dev_addr == 0xA0)
2625 			read_addr = MLX5E_I2C_ADDR_LOW;
2626 		else if (i2c.dev_addr == 0xA2)
2627 			read_addr = MLX5E_I2C_ADDR_HIGH;
2628 		else {
2629 			if_printf(ifp, "Query eeprom failed, "
2630 			    "Invalid Address: %X\n", i2c.dev_addr);
2631 			error = EINVAL;
2632 			goto err_i2c;
2633 		}
2634 		error = mlx5_query_eeprom(priv->mdev,
2635 		    read_addr, MLX5E_EEPROM_LOW_PAGE,
2636 		    (uint32_t)i2c.offset, (uint32_t)i2c.len, module_num,
2637 		    (uint32_t *)i2c.data, &size_read);
2638 		if (error) {
2639 			if_printf(ifp, "Query eeprom failed, eeprom "
2640 			    "reading is not supported\n");
2641 			error = EINVAL;
2642 			goto err_i2c;
2643 		}
2644 
2645 		if (i2c.len > MLX5_EEPROM_MAX_BYTES) {
2646 			error = mlx5_query_eeprom(priv->mdev,
2647 			    read_addr, MLX5E_EEPROM_LOW_PAGE,
2648 			    (uint32_t)(i2c.offset + size_read),
2649 			    (uint32_t)(i2c.len - size_read), module_num,
2650 			    (uint32_t *)(i2c.data + size_read), &size_read);
2651 		}
2652 		if (error) {
2653 			if_printf(ifp, "Query eeprom failed, eeprom "
2654 			    "reading is not supported\n");
2655 			error = EINVAL;
2656 			goto err_i2c;
2657 		}
2658 
2659 		error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
2660 err_i2c:
2661 		PRIV_UNLOCK(priv);
2662 		break;
2663 
2664 	default:
2665 		error = ether_ioctl(ifp, command, data);
2666 		break;
2667 	}
2668 	return (error);
2669 }
2670 
2671 static int
2672 mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
2673 {
2674 	/*
2675 	 * TODO: uncoment once FW really sets all these bits if
2676 	 * (!mdev->caps.eth.rss_ind_tbl_cap || !mdev->caps.eth.csum_cap ||
2677 	 * !mdev->caps.eth.max_lso_cap || !mdev->caps.eth.vlan_cap ||
2678 	 * !(mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_SCQE_BRK_MOD)) return
2679 	 * -ENOTSUPP;
2680 	 */
2681 
2682 	/* TODO: add more must-to-have features */
2683 
2684 	return (0);
2685 }
2686 
2687 static void
2688 mlx5e_build_ifp_priv(struct mlx5_core_dev *mdev,
2689     struct mlx5e_priv *priv,
2690     int num_comp_vectors)
2691 {
2692 	/*
2693 	 * TODO: Consider link speed for setting "log_sq_size",
2694 	 * "log_rq_size" and "cq_moderation_xxx":
2695 	 */
2696 	priv->params.log_sq_size =
2697 	    MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
2698 	priv->params.log_rq_size =
2699 	    MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
2700 	priv->params.rx_cq_moderation_usec =
2701 	    MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
2702 	    MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE :
2703 	    MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC;
2704 	priv->params.rx_cq_moderation_mode =
2705 	    MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? 1 : 0;
2706 	priv->params.rx_cq_moderation_pkts =
2707 	    MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS;
2708 	priv->params.tx_cq_moderation_usec =
2709 	    MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC;
2710 	priv->params.tx_cq_moderation_pkts =
2711 	    MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
2712 	priv->params.min_rx_wqes =
2713 	    MLX5E_PARAMS_DEFAULT_MIN_RX_WQES;
2714 	priv->params.rx_hash_log_tbl_sz =
2715 	    (order_base_2(num_comp_vectors) >
2716 	    MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ) ?
2717 	    order_base_2(num_comp_vectors) :
2718 	    MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ;
2719 	priv->params.num_tc = 1;
2720 	priv->params.default_vlan_prio = 0;
2721 	priv->counter_set_id = -1;
2722 
2723 	/*
2724 	 * hw lro is currently defaulted to off. when it won't anymore we
2725 	 * will consider the HW capability: "!!MLX5_CAP_ETH(mdev, lro_cap)"
2726 	 */
2727 	priv->params.hw_lro_en = false;
2728 	priv->params.lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
2729 
2730 	priv->params.cqe_zipping_en = !!MLX5_CAP_GEN(mdev, cqe_compression);
2731 
2732 	priv->mdev = mdev;
2733 	priv->params.num_channels = num_comp_vectors;
2734 	priv->order_base_2_num_channels = order_base_2(num_comp_vectors);
2735 	priv->queue_mapping_channel_mask =
2736 	    roundup_pow_of_two(num_comp_vectors) - 1;
2737 	priv->num_tc = priv->params.num_tc;
2738 	priv->default_vlan_prio = priv->params.default_vlan_prio;
2739 
2740 	INIT_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
2741 	INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work);
2742 	INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work);
2743 }
2744 
2745 static int
2746 mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn,
2747     struct mlx5_core_mr *mr)
2748 {
2749 	struct ifnet *ifp = priv->ifp;
2750 	struct mlx5_core_dev *mdev = priv->mdev;
2751 	struct mlx5_create_mkey_mbox_in *in;
2752 	int err;
2753 
2754 	in = mlx5_vzalloc(sizeof(*in));
2755 	if (in == NULL) {
2756 		if_printf(ifp, "%s: failed to allocate inbox\n", __func__);
2757 		return (-ENOMEM);
2758 	}
2759 	in->seg.flags = MLX5_PERM_LOCAL_WRITE |
2760 	    MLX5_PERM_LOCAL_READ |
2761 	    MLX5_ACCESS_MODE_PA;
2762 	in->seg.flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64);
2763 	in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
2764 
2765 	err = mlx5_core_create_mkey(mdev, mr, in, sizeof(*in), NULL, NULL,
2766 	    NULL);
2767 	if (err)
2768 		if_printf(ifp, "%s: mlx5_core_create_mkey failed, %d\n",
2769 		    __func__, err);
2770 
2771 	kvfree(in);
2772 
2773 	return (err);
2774 }
2775 
2776 static const char *mlx5e_vport_stats_desc[] = {
2777 	MLX5E_VPORT_STATS(MLX5E_STATS_DESC)
2778 };
2779 
2780 static const char *mlx5e_pport_stats_desc[] = {
2781 	MLX5E_PPORT_STATS(MLX5E_STATS_DESC)
2782 };
2783 
2784 static void
2785 mlx5e_priv_mtx_init(struct mlx5e_priv *priv)
2786 {
2787 	mtx_init(&priv->async_events_mtx, "mlx5async", MTX_NETWORK_LOCK, MTX_DEF);
2788 	sx_init(&priv->state_lock, "mlx5state");
2789 	callout_init_mtx(&priv->watchdog, &priv->async_events_mtx, 0);
2790 }
2791 
2792 static void
2793 mlx5e_priv_mtx_destroy(struct mlx5e_priv *priv)
2794 {
2795 	mtx_destroy(&priv->async_events_mtx);
2796 	sx_destroy(&priv->state_lock);
2797 }
2798 
2799 static int
2800 sysctl_firmware(SYSCTL_HANDLER_ARGS)
2801 {
2802 	/*
2803 	 * %d.%d%.d the string format.
2804 	 * fw_rev_{maj,min,sub} return u16, 2^16 = 65536.
2805 	 * We need at most 5 chars to store that.
2806 	 * It also has: two "." and NULL at the end, which means we need 18
2807 	 * (5*3 + 3) chars at most.
2808 	 */
2809 	char fw[18];
2810 	struct mlx5e_priv *priv = arg1;
2811 	int error;
2812 
2813 	snprintf(fw, sizeof(fw), "%d.%d.%d", fw_rev_maj(priv->mdev), fw_rev_min(priv->mdev),
2814 	    fw_rev_sub(priv->mdev));
2815 	error = sysctl_handle_string(oidp, fw, sizeof(fw), req);
2816 	return (error);
2817 }
2818 
2819 static void
2820 mlx5e_add_hw_stats(struct mlx5e_priv *priv)
2821 {
2822 	SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw),
2823 	    OID_AUTO, "fw_version", CTLTYPE_STRING | CTLFLAG_RD, priv, 0,
2824 	    sysctl_firmware, "A", "HCA firmware version");
2825 
2826 	SYSCTL_ADD_STRING(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw),
2827 	    OID_AUTO, "board_id", CTLFLAG_RD, priv->mdev->board_id, 0,
2828 	    "Board ID");
2829 }
2830 
2831 static void
2832 mlx5e_setup_pauseframes(struct mlx5e_priv *priv)
2833 {
2834 #if (__FreeBSD_version < 1100000)
2835 	char path[64];
2836 
2837 #endif
2838 	/* Only receiving pauseframes is enabled by default */
2839 	priv->params.tx_pauseframe_control = 0;
2840 	priv->params.rx_pauseframe_control = 1;
2841 
2842 #if (__FreeBSD_version < 1100000)
2843 	/* compute path for sysctl */
2844 	snprintf(path, sizeof(path), "dev.mce.%d.tx_pauseframe_control",
2845 	    device_get_unit(priv->mdev->pdev->dev.bsddev));
2846 
2847 	/* try to fetch tunable, if any */
2848 	TUNABLE_INT_FETCH(path, &priv->params.tx_pauseframe_control);
2849 
2850 	/* compute path for sysctl */
2851 	snprintf(path, sizeof(path), "dev.mce.%d.rx_pauseframe_control",
2852 	    device_get_unit(priv->mdev->pdev->dev.bsddev));
2853 
2854 	/* try to fetch tunable, if any */
2855 	TUNABLE_INT_FETCH(path, &priv->params.rx_pauseframe_control);
2856 #endif
2857 
2858 	/* register pausframe SYSCTLs */
2859 	SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
2860 	    OID_AUTO, "tx_pauseframe_control", CTLFLAG_RDTUN,
2861 	    &priv->params.tx_pauseframe_control, 0,
2862 	    "Set to enable TX pause frames. Clear to disable.");
2863 
2864 	SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
2865 	    OID_AUTO, "rx_pauseframe_control", CTLFLAG_RDTUN,
2866 	    &priv->params.rx_pauseframe_control, 0,
2867 	    "Set to enable RX pause frames. Clear to disable.");
2868 
2869 	/* range check */
2870 	priv->params.tx_pauseframe_control =
2871 	    priv->params.tx_pauseframe_control ? 1 : 0;
2872 	priv->params.rx_pauseframe_control =
2873 	    priv->params.rx_pauseframe_control ? 1 : 0;
2874 
2875 	/* update firmware */
2876 	mlx5_set_port_pause(priv->mdev, 1,
2877 	    priv->params.rx_pauseframe_control,
2878 	    priv->params.tx_pauseframe_control);
2879 }
2880 
2881 static void *
2882 mlx5e_create_ifp(struct mlx5_core_dev *mdev)
2883 {
2884 	static volatile int mlx5_en_unit;
2885 	struct ifnet *ifp;
2886 	struct mlx5e_priv *priv;
2887 	u8 dev_addr[ETHER_ADDR_LEN] __aligned(4);
2888 	struct sysctl_oid_list *child;
2889 	int ncv = mdev->priv.eq_table.num_comp_vectors;
2890 	char unit[16];
2891 	int err;
2892 	int i;
2893 	u32 eth_proto_cap;
2894 
2895 	if (mlx5e_check_required_hca_cap(mdev)) {
2896 		mlx5_core_dbg(mdev, "mlx5e_check_required_hca_cap() failed\n");
2897 		return (NULL);
2898 	}
2899 	priv = malloc(sizeof(*priv), M_MLX5EN, M_WAITOK | M_ZERO);
2900 	if (priv == NULL) {
2901 		mlx5_core_err(mdev, "malloc() failed\n");
2902 		return (NULL);
2903 	}
2904 	mlx5e_priv_mtx_init(priv);
2905 
2906 	ifp = priv->ifp = if_alloc(IFT_ETHER);
2907 	if (ifp == NULL) {
2908 		mlx5_core_err(mdev, "if_alloc() failed\n");
2909 		goto err_free_priv;
2910 	}
2911 	ifp->if_softc = priv;
2912 	if_initname(ifp, "mce", atomic_fetchadd_int(&mlx5_en_unit, 1));
2913 	ifp->if_mtu = ETHERMTU;
2914 	ifp->if_init = mlx5e_open;
2915 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2916 	ifp->if_ioctl = mlx5e_ioctl;
2917 	ifp->if_transmit = mlx5e_xmit;
2918 	ifp->if_qflush = if_qflush;
2919 #if (__FreeBSD_version >= 1100000)
2920 	ifp->if_get_counter = mlx5e_get_counter;
2921 #endif
2922 	ifp->if_snd.ifq_maxlen = ifqmaxlen;
2923 	/*
2924          * Set driver features
2925          */
2926 	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6;
2927 	ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING;
2928 	ifp->if_capabilities |= IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWFILTER;
2929 	ifp->if_capabilities |= IFCAP_LINKSTATE | IFCAP_JUMBO_MTU;
2930 	ifp->if_capabilities |= IFCAP_LRO;
2931 	ifp->if_capabilities |= IFCAP_TSO | IFCAP_VLAN_HWTSO;
2932 
2933 	/* set TSO limits so that we don't have to drop TX packets */
2934 	ifp->if_hw_tsomax = MLX5E_MAX_TX_PAYLOAD_SIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
2935 	ifp->if_hw_tsomaxsegcount = MLX5E_MAX_TX_MBUF_FRAGS - 1 /* hdr */;
2936 	ifp->if_hw_tsomaxsegsize = MLX5E_MAX_TX_MBUF_SIZE;
2937 
2938 	ifp->if_capenable = ifp->if_capabilities;
2939 	ifp->if_hwassist = 0;
2940 	if (ifp->if_capenable & IFCAP_TSO)
2941 		ifp->if_hwassist |= CSUM_TSO;
2942 	if (ifp->if_capenable & IFCAP_TXCSUM)
2943 		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP);
2944 	if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
2945 		ifp->if_hwassist |= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
2946 
2947 	/* ifnet sysctl tree */
2948 	sysctl_ctx_init(&priv->sysctl_ctx);
2949 	priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_dev),
2950 	    OID_AUTO, ifp->if_dname, CTLFLAG_RD, 0, "MLX5 ethernet - interface name");
2951 	if (priv->sysctl_ifnet == NULL) {
2952 		mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
2953 		goto err_free_sysctl;
2954 	}
2955 	snprintf(unit, sizeof(unit), "%d", ifp->if_dunit);
2956 	priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
2957 	    OID_AUTO, unit, CTLFLAG_RD, 0, "MLX5 ethernet - interface unit");
2958 	if (priv->sysctl_ifnet == NULL) {
2959 		mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
2960 		goto err_free_sysctl;
2961 	}
2962 
2963 	/* HW sysctl tree */
2964 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(mdev->pdev->dev.bsddev));
2965 	priv->sysctl_hw = SYSCTL_ADD_NODE(&priv->sysctl_ctx, child,
2966 	    OID_AUTO, "hw", CTLFLAG_RD, 0, "MLX5 ethernet dev hw");
2967 	if (priv->sysctl_hw == NULL) {
2968 		mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
2969 		goto err_free_sysctl;
2970 	}
2971 	mlx5e_build_ifp_priv(mdev, priv, ncv);
2972 	err = mlx5_alloc_map_uar(mdev, &priv->cq_uar);
2973 	if (err) {
2974 		if_printf(ifp, "%s: mlx5_alloc_map_uar failed, %d\n",
2975 		    __func__, err);
2976 		goto err_free_sysctl;
2977 	}
2978 	err = mlx5_core_alloc_pd(mdev, &priv->pdn);
2979 	if (err) {
2980 		if_printf(ifp, "%s: mlx5_core_alloc_pd failed, %d\n",
2981 		    __func__, err);
2982 		goto err_unmap_free_uar;
2983 	}
2984 	err = mlx5_alloc_transport_domain(mdev, &priv->tdn);
2985 	if (err) {
2986 		if_printf(ifp, "%s: mlx5_alloc_transport_domain failed, %d\n",
2987 		    __func__, err);
2988 		goto err_dealloc_pd;
2989 	}
2990 	err = mlx5e_create_mkey(priv, priv->pdn, &priv->mr);
2991 	if (err) {
2992 		if_printf(ifp, "%s: mlx5e_create_mkey failed, %d\n",
2993 		    __func__, err);
2994 		goto err_dealloc_transport_domain;
2995 	}
2996 	mlx5_query_nic_vport_mac_address(priv->mdev, 0, dev_addr);
2997 
2998 	/* check if we should generate a random MAC address */
2999 	if (MLX5_CAP_GEN(priv->mdev, vport_group_manager) == 0 &&
3000 	    is_zero_ether_addr(dev_addr)) {
3001 		random_ether_addr(dev_addr);
3002 		if_printf(ifp, "Assigned random MAC address\n");
3003 	}
3004 
3005 	/* set default MTU */
3006 	mlx5e_set_dev_port_mtu(ifp, ifp->if_mtu);
3007 
3008 	/* Set desc */
3009 	device_set_desc(mdev->pdev->dev.bsddev, mlx5e_version);
3010 
3011 	/* Set default media status */
3012 	priv->media_status_last = IFM_AVALID;
3013 	priv->media_active_last = IFM_ETHER | IFM_AUTO |
3014 	    IFM_ETH_RXPAUSE | IFM_FDX;
3015 
3016 	/* setup default pauseframes configuration */
3017 	mlx5e_setup_pauseframes(priv);
3018 
3019 	err = mlx5_query_port_proto_cap(mdev, &eth_proto_cap, MLX5_PTYS_EN);
3020 	if (err) {
3021 		eth_proto_cap = 0;
3022 		if_printf(ifp, "%s: Query port media capability failed, %d\n",
3023 		    __func__, err);
3024 	}
3025 
3026 	/* Setup supported medias */
3027 	ifmedia_init(&priv->media, IFM_IMASK | IFM_ETH_FMASK,
3028 	    mlx5e_media_change, mlx5e_media_status);
3029 
3030 	for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
3031 		if (mlx5e_mode_table[i].baudrate == 0)
3032 			continue;
3033 		if (MLX5E_PROT_MASK(i) & eth_proto_cap) {
3034 			ifmedia_add(&priv->media,
3035 			    mlx5e_mode_table[i].subtype |
3036 			    IFM_ETHER, 0, NULL);
3037 			ifmedia_add(&priv->media,
3038 			    mlx5e_mode_table[i].subtype |
3039 			    IFM_ETHER | IFM_FDX |
3040 			    IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL);
3041 		}
3042 	}
3043 
3044 	ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3045 	ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO | IFM_FDX |
3046 	    IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL);
3047 
3048 	/* Set autoselect by default */
3049 	ifmedia_set(&priv->media, IFM_ETHER | IFM_AUTO | IFM_FDX |
3050 	    IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE);
3051 	ether_ifattach(ifp, dev_addr);
3052 
3053 	/* Register for VLAN events */
3054 	priv->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
3055 	    mlx5e_vlan_rx_add_vid, priv, EVENTHANDLER_PRI_FIRST);
3056 	priv->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
3057 	    mlx5e_vlan_rx_kill_vid, priv, EVENTHANDLER_PRI_FIRST);
3058 
3059 	/* Link is down by default */
3060 	if_link_state_change(ifp, LINK_STATE_DOWN);
3061 
3062 	mlx5e_enable_async_events(priv);
3063 
3064 	mlx5e_add_hw_stats(priv);
3065 
3066 	mlx5e_create_stats(&priv->stats.vport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3067 	    "vstats", mlx5e_vport_stats_desc, MLX5E_VPORT_STATS_NUM,
3068 	    priv->stats.vport.arg);
3069 
3070 	mlx5e_create_stats(&priv->stats.pport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3071 	    "pstats", mlx5e_pport_stats_desc, MLX5E_PPORT_STATS_NUM,
3072 	    priv->stats.pport.arg);
3073 
3074 	mlx5e_create_ethtool(priv);
3075 
3076 	mtx_lock(&priv->async_events_mtx);
3077 	mlx5e_update_stats(priv);
3078 	mtx_unlock(&priv->async_events_mtx);
3079 
3080 	return (priv);
3081 
3082 err_dealloc_transport_domain:
3083 	mlx5_dealloc_transport_domain(mdev, priv->tdn);
3084 
3085 err_dealloc_pd:
3086 	mlx5_core_dealloc_pd(mdev, priv->pdn);
3087 
3088 err_unmap_free_uar:
3089 	mlx5_unmap_free_uar(mdev, &priv->cq_uar);
3090 
3091 err_free_sysctl:
3092 	sysctl_ctx_free(&priv->sysctl_ctx);
3093 
3094 	if_free(ifp);
3095 
3096 err_free_priv:
3097 	mlx5e_priv_mtx_destroy(priv);
3098 	free(priv, M_MLX5EN);
3099 	return (NULL);
3100 }
3101 
3102 static void
3103 mlx5e_destroy_ifp(struct mlx5_core_dev *mdev, void *vpriv)
3104 {
3105 	struct mlx5e_priv *priv = vpriv;
3106 	struct ifnet *ifp = priv->ifp;
3107 
3108 	/* don't allow more IOCTLs */
3109 	priv->gone = 1;
3110 
3111 	/*
3112 	 * Clear the device description to avoid use after free,
3113 	 * because the bsddev is not destroyed when this module is
3114 	 * unloaded:
3115 	 */
3116 	device_set_desc(mdev->pdev->dev.bsddev, NULL);
3117 
3118 	/* XXX wait a bit to allow IOCTL handlers to complete */
3119 	pause("W", hz);
3120 
3121 	/* stop watchdog timer */
3122 	callout_drain(&priv->watchdog);
3123 
3124 	if (priv->vlan_attach != NULL)
3125 		EVENTHANDLER_DEREGISTER(vlan_config, priv->vlan_attach);
3126 	if (priv->vlan_detach != NULL)
3127 		EVENTHANDLER_DEREGISTER(vlan_unconfig, priv->vlan_detach);
3128 
3129 	/* make sure device gets closed */
3130 	PRIV_LOCK(priv);
3131 	mlx5e_close_locked(ifp);
3132 	PRIV_UNLOCK(priv);
3133 
3134 	/* unregister device */
3135 	ifmedia_removeall(&priv->media);
3136 	ether_ifdetach(ifp);
3137 	if_free(ifp);
3138 
3139 	/* destroy all remaining sysctl nodes */
3140 	if (priv->sysctl_debug)
3141 		sysctl_ctx_free(&priv->stats.port_stats_debug.ctx);
3142 	sysctl_ctx_free(&priv->stats.vport.ctx);
3143 	sysctl_ctx_free(&priv->stats.pport.ctx);
3144 	sysctl_ctx_free(&priv->sysctl_ctx);
3145 
3146 	mlx5_core_destroy_mkey(priv->mdev, &priv->mr);
3147 	mlx5_dealloc_transport_domain(priv->mdev, priv->tdn);
3148 	mlx5_core_dealloc_pd(priv->mdev, priv->pdn);
3149 	mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar);
3150 	mlx5e_disable_async_events(priv);
3151 	flush_scheduled_work();
3152 	mlx5e_priv_mtx_destroy(priv);
3153 	free(priv, M_MLX5EN);
3154 }
3155 
3156 static void *
3157 mlx5e_get_ifp(void *vpriv)
3158 {
3159 	struct mlx5e_priv *priv = vpriv;
3160 
3161 	return (priv->ifp);
3162 }
3163 
3164 static struct mlx5_interface mlx5e_interface = {
3165 	.add = mlx5e_create_ifp,
3166 	.remove = mlx5e_destroy_ifp,
3167 	.event = mlx5e_async_event,
3168 	.protocol = MLX5_INTERFACE_PROTOCOL_ETH,
3169 	.get_dev = mlx5e_get_ifp,
3170 };
3171 
3172 void
3173 mlx5e_init(void)
3174 {
3175 	mlx5_register_interface(&mlx5e_interface);
3176 }
3177 
3178 void
3179 mlx5e_cleanup(void)
3180 {
3181 	mlx5_unregister_interface(&mlx5e_interface);
3182 }
3183 
3184 module_init_order(mlx5e_init, SI_ORDER_THIRD);
3185 module_exit_order(mlx5e_cleanup, SI_ORDER_THIRD);
3186 
3187 #if (__FreeBSD_version >= 1100000)
3188 MODULE_DEPEND(mlx5en, linuxkpi, 1, 1, 1);
3189 #endif
3190 MODULE_DEPEND(mlx5en, mlx5, 1, 1, 1);
3191 MODULE_VERSION(mlx5en, 1);
3192