xref: /freebsd/sys/dev/mlx5/mlx5_en/mlx5_en_main.c (revision f061a2215f9bf0bea98ac601a34750f89428db67)
1 /*-
2  * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  *
25  * $FreeBSD$
26  */
27 
28 #include "en.h"
29 
30 #include <sys/sockio.h>
31 #include <machine/atomic.h>
32 
33 #define	ETH_DRIVER_VERSION	"3.1.0-dev"
34 char mlx5e_version[] = "Mellanox Ethernet driver"
35     " (" ETH_DRIVER_VERSION ")";
36 
37 struct mlx5e_rq_param {
38 	u32	rqc [MLX5_ST_SZ_DW(rqc)];
39 	struct mlx5_wq_param wq;
40 };
41 
42 struct mlx5e_sq_param {
43 	u32	sqc [MLX5_ST_SZ_DW(sqc)];
44 	struct mlx5_wq_param wq;
45 };
46 
47 struct mlx5e_cq_param {
48 	u32	cqc [MLX5_ST_SZ_DW(cqc)];
49 	struct mlx5_wq_param wq;
50 	u16	eq_ix;
51 };
52 
53 struct mlx5e_channel_param {
54 	struct mlx5e_rq_param rq;
55 	struct mlx5e_sq_param sq;
56 	struct mlx5e_cq_param rx_cq;
57 	struct mlx5e_cq_param tx_cq;
58 };
59 
60 static const struct {
61 	u32	subtype;
62 	u64	baudrate;
63 }	mlx5e_mode_table[MLX5E_LINK_MODES_NUMBER] = {
64 
65 	[MLX5E_1000BASE_CX_SGMII] = {
66 		.subtype = IFM_1000_CX_SGMII,
67 		.baudrate = IF_Mbps(1000ULL),
68 	},
69 	[MLX5E_1000BASE_KX] = {
70 		.subtype = IFM_1000_KX,
71 		.baudrate = IF_Mbps(1000ULL),
72 	},
73 	[MLX5E_10GBASE_CX4] = {
74 		.subtype = IFM_10G_CX4,
75 		.baudrate = IF_Gbps(10ULL),
76 	},
77 	[MLX5E_10GBASE_KX4] = {
78 		.subtype = IFM_10G_KX4,
79 		.baudrate = IF_Gbps(10ULL),
80 	},
81 	[MLX5E_10GBASE_KR] = {
82 		.subtype = IFM_10G_KR,
83 		.baudrate = IF_Gbps(10ULL),
84 	},
85 	[MLX5E_20GBASE_KR2] = {
86 		.subtype = IFM_20G_KR2,
87 		.baudrate = IF_Gbps(20ULL),
88 	},
89 	[MLX5E_40GBASE_CR4] = {
90 		.subtype = IFM_40G_CR4,
91 		.baudrate = IF_Gbps(40ULL),
92 	},
93 	[MLX5E_40GBASE_KR4] = {
94 		.subtype = IFM_40G_KR4,
95 		.baudrate = IF_Gbps(40ULL),
96 	},
97 	[MLX5E_56GBASE_R4] = {
98 		.subtype = IFM_56G_R4,
99 		.baudrate = IF_Gbps(56ULL),
100 	},
101 	[MLX5E_10GBASE_CR] = {
102 		.subtype = IFM_10G_CR1,
103 		.baudrate = IF_Gbps(10ULL),
104 	},
105 	[MLX5E_10GBASE_SR] = {
106 		.subtype = IFM_10G_SR,
107 		.baudrate = IF_Gbps(10ULL),
108 	},
109 	[MLX5E_10GBASE_LR] = {
110 		.subtype = IFM_10G_LR,
111 		.baudrate = IF_Gbps(10ULL),
112 	},
113 	[MLX5E_40GBASE_SR4] = {
114 		.subtype = IFM_40G_SR4,
115 		.baudrate = IF_Gbps(40ULL),
116 	},
117 	[MLX5E_40GBASE_LR4] = {
118 		.subtype = IFM_40G_LR4,
119 		.baudrate = IF_Gbps(40ULL),
120 	},
121 	[MLX5E_100GBASE_CR4] = {
122 		.subtype = IFM_100G_CR4,
123 		.baudrate = IF_Gbps(100ULL),
124 	},
125 	[MLX5E_100GBASE_SR4] = {
126 		.subtype = IFM_100G_SR4,
127 		.baudrate = IF_Gbps(100ULL),
128 	},
129 	[MLX5E_100GBASE_KR4] = {
130 		.subtype = IFM_100G_KR4,
131 		.baudrate = IF_Gbps(100ULL),
132 	},
133 	[MLX5E_100GBASE_LR4] = {
134 		.subtype = IFM_100G_LR4,
135 		.baudrate = IF_Gbps(100ULL),
136 	},
137 	[MLX5E_100BASE_TX] = {
138 		.subtype = IFM_100_TX,
139 		.baudrate = IF_Mbps(100ULL),
140 	},
141 	[MLX5E_100BASE_T] = {
142 		.subtype = IFM_100_T,
143 		.baudrate = IF_Mbps(100ULL),
144 	},
145 	[MLX5E_10GBASE_T] = {
146 		.subtype = IFM_10G_T,
147 		.baudrate = IF_Gbps(10ULL),
148 	},
149 	[MLX5E_25GBASE_CR] = {
150 		.subtype = IFM_25G_CR,
151 		.baudrate = IF_Gbps(25ULL),
152 	},
153 	[MLX5E_25GBASE_KR] = {
154 		.subtype = IFM_25G_KR,
155 		.baudrate = IF_Gbps(25ULL),
156 	},
157 	[MLX5E_25GBASE_SR] = {
158 		.subtype = IFM_25G_SR,
159 		.baudrate = IF_Gbps(25ULL),
160 	},
161 	[MLX5E_50GBASE_CR2] = {
162 		.subtype = IFM_50G_CR2,
163 		.baudrate = IF_Gbps(50ULL),
164 	},
165 	[MLX5E_50GBASE_KR2] = {
166 		.subtype = IFM_50G_KR2,
167 		.baudrate = IF_Gbps(50ULL),
168 	},
169 };
170 
171 MALLOC_DEFINE(M_MLX5EN, "MLX5EN", "MLX5 Ethernet");
172 
173 static void
174 mlx5e_update_carrier(struct mlx5e_priv *priv)
175 {
176 	struct mlx5_core_dev *mdev = priv->mdev;
177 	u32 out[MLX5_ST_SZ_DW(ptys_reg)];
178 	u32 eth_proto_oper;
179 	int error;
180 	u8 port_state;
181 	u8 i;
182 
183 	port_state = mlx5_query_vport_state(mdev,
184 	    MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT);
185 
186 	if (port_state == VPORT_STATE_UP) {
187 		priv->media_status_last |= IFM_ACTIVE;
188 	} else {
189 		priv->media_status_last &= ~IFM_ACTIVE;
190 		priv->media_active_last = IFM_ETHER;
191 		if_link_state_change(priv->ifp, LINK_STATE_DOWN);
192 		return;
193 	}
194 
195 	error = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN);
196 	if (error) {
197 		priv->media_active_last = IFM_ETHER;
198 		priv->ifp->if_baudrate = 1;
199 		if_printf(priv->ifp, "%s: query port ptys failed: 0x%x\n",
200 		    __func__, error);
201 		return;
202 	}
203 	eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper);
204 
205 	for (i = 0; i != MLX5E_LINK_MODES_NUMBER; i++) {
206 		if (mlx5e_mode_table[i].baudrate == 0)
207 			continue;
208 		if (MLX5E_PROT_MASK(i) & eth_proto_oper) {
209 			priv->ifp->if_baudrate =
210 			    mlx5e_mode_table[i].baudrate;
211 			priv->media_active_last =
212 			    mlx5e_mode_table[i].subtype | IFM_ETHER | IFM_FDX;
213 		}
214 	}
215 	if_link_state_change(priv->ifp, LINK_STATE_UP);
216 }
217 
218 static void
219 mlx5e_media_status(struct ifnet *dev, struct ifmediareq *ifmr)
220 {
221 	struct mlx5e_priv *priv = dev->if_softc;
222 
223 	ifmr->ifm_status = priv->media_status_last;
224 	ifmr->ifm_active = priv->media_active_last |
225 	    (priv->params.rx_pauseframe_control ? IFM_ETH_RXPAUSE : 0) |
226 	    (priv->params.tx_pauseframe_control ? IFM_ETH_TXPAUSE : 0);
227 
228 }
229 
230 static u32
231 mlx5e_find_link_mode(u32 subtype)
232 {
233 	u32 i;
234 	u32 link_mode = 0;
235 
236 	for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
237 		if (mlx5e_mode_table[i].baudrate == 0)
238 			continue;
239 		if (mlx5e_mode_table[i].subtype == subtype)
240 			link_mode |= MLX5E_PROT_MASK(i);
241 	}
242 
243 	return (link_mode);
244 }
245 
246 static int
247 mlx5e_media_change(struct ifnet *dev)
248 {
249 	struct mlx5e_priv *priv = dev->if_softc;
250 	struct mlx5_core_dev *mdev = priv->mdev;
251 	u32 eth_proto_cap;
252 	u32 link_mode;
253 	int was_opened;
254 	int locked;
255 	int error;
256 
257 	locked = PRIV_LOCKED(priv);
258 	if (!locked)
259 		PRIV_LOCK(priv);
260 
261 	if (IFM_TYPE(priv->media.ifm_media) != IFM_ETHER) {
262 		error = EINVAL;
263 		goto done;
264 	}
265 	link_mode = mlx5e_find_link_mode(IFM_SUBTYPE(priv->media.ifm_media));
266 
267 	/* query supported capabilities */
268 	error = mlx5_query_port_proto_cap(mdev, &eth_proto_cap, MLX5_PTYS_EN);
269 	if (error != 0) {
270 		if_printf(dev, "Query port media capability failed\n");
271 		goto done;
272 	}
273 	/* check for autoselect */
274 	if (IFM_SUBTYPE(priv->media.ifm_media) == IFM_AUTO) {
275 		link_mode = eth_proto_cap;
276 		if (link_mode == 0) {
277 			if_printf(dev, "Port media capability is zero\n");
278 			error = EINVAL;
279 			goto done;
280 		}
281 	} else {
282 		link_mode = link_mode & eth_proto_cap;
283 		if (link_mode == 0) {
284 			if_printf(dev, "Not supported link mode requested\n");
285 			error = EINVAL;
286 			goto done;
287 		}
288 	}
289 	/* update pauseframe control bits */
290 	priv->params.rx_pauseframe_control =
291 	    (priv->media.ifm_media & IFM_ETH_RXPAUSE) ? 1 : 0;
292 	priv->params.tx_pauseframe_control =
293 	    (priv->media.ifm_media & IFM_ETH_TXPAUSE) ? 1 : 0;
294 
295 	/* check if device is opened */
296 	was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
297 
298 	/* reconfigure the hardware */
299 	mlx5_set_port_status(mdev, MLX5_PORT_DOWN);
300 	mlx5_set_port_proto(mdev, link_mode, MLX5_PTYS_EN);
301 	mlx5_set_port_pause(mdev, 1,
302 	    priv->params.rx_pauseframe_control,
303 	    priv->params.tx_pauseframe_control);
304 	if (was_opened)
305 		mlx5_set_port_status(mdev, MLX5_PORT_UP);
306 
307 done:
308 	if (!locked)
309 		PRIV_UNLOCK(priv);
310 	return (error);
311 }
312 
313 static void
314 mlx5e_update_carrier_work(struct work_struct *work)
315 {
316 	struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
317 	    update_carrier_work);
318 
319 	PRIV_LOCK(priv);
320 	if (test_bit(MLX5E_STATE_OPENED, &priv->state))
321 		mlx5e_update_carrier(priv);
322 	PRIV_UNLOCK(priv);
323 }
324 
325 static void
326 mlx5e_update_pport_counters(struct mlx5e_priv *priv)
327 {
328 	struct mlx5_core_dev *mdev = priv->mdev;
329 	struct mlx5e_pport_stats *s = &priv->stats.pport;
330 	struct mlx5e_port_stats_debug *s_debug = &priv->stats.port_stats_debug;
331 	u32 *in;
332 	u32 *out;
333 	u64 *ptr;
334 	unsigned sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
335 	unsigned x;
336 	unsigned y;
337 
338 	in = mlx5_vzalloc(sz);
339 	out = mlx5_vzalloc(sz);
340 	if (in == NULL || out == NULL)
341 		goto free_out;
342 
343 	ptr = (uint64_t *)MLX5_ADDR_OF(ppcnt_reg, out, counter_set);
344 
345 	MLX5_SET(ppcnt_reg, in, local_port, 1);
346 
347 	MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP);
348 	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
349 	for (x = y = 0; x != MLX5E_PPORT_IEEE802_3_STATS_NUM; x++, y++)
350 		s->arg[y] = be64toh(ptr[x]);
351 
352 	MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP);
353 	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
354 	for (x = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM; x++, y++)
355 		s->arg[y] = be64toh(ptr[x]);
356 	for (y = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM +
357 	    MLX5E_PPORT_RFC2819_STATS_DEBUG_NUM; x++, y++)
358 		s_debug->arg[y] = be64toh(ptr[x]);
359 
360 	MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2863_COUNTERS_GROUP);
361 	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
362 	for (x = 0; x != MLX5E_PPORT_RFC2863_STATS_DEBUG_NUM; x++, y++)
363 		s_debug->arg[y] = be64toh(ptr[x]);
364 
365 	MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP);
366 	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
367 	for (x = 0; x != MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG_NUM; x++, y++)
368 		s_debug->arg[y] = be64toh(ptr[x]);
369 free_out:
370 	kvfree(in);
371 	kvfree(out);
372 }
373 
374 static void
375 mlx5e_update_stats_work(struct work_struct *work)
376 {
377 	struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
378 	    update_stats_work);
379 	struct mlx5_core_dev *mdev = priv->mdev;
380 	struct mlx5e_vport_stats *s = &priv->stats.vport;
381 	struct mlx5e_rq_stats *rq_stats;
382 	struct mlx5e_sq_stats *sq_stats;
383 	struct buf_ring *sq_br;
384 #if (__FreeBSD_version < 1100000)
385 	struct ifnet *ifp = priv->ifp;
386 #endif
387 
388 	u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)];
389 	u32 *out;
390 	int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out);
391 	u64 tso_packets = 0;
392 	u64 tso_bytes = 0;
393 	u64 tx_queue_dropped = 0;
394 	u64 tx_defragged = 0;
395 	u64 tx_offload_none = 0;
396 	u64 lro_packets = 0;
397 	u64 lro_bytes = 0;
398 	u64 sw_lro_queued = 0;
399 	u64 sw_lro_flushed = 0;
400 	u64 rx_csum_none = 0;
401 	u64 rx_wqe_err = 0;
402 	u32 rx_out_of_buffer = 0;
403 	int i;
404 	int j;
405 
406 	PRIV_LOCK(priv);
407 	out = mlx5_vzalloc(outlen);
408 	if (out == NULL)
409 		goto free_out;
410 	if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
411 		goto free_out;
412 
413 	/* Collect firts the SW counters and then HW for consistency */
414 	for (i = 0; i < priv->params.num_channels; i++) {
415 		struct mlx5e_rq *rq = &priv->channel[i]->rq;
416 
417 		rq_stats = &priv->channel[i]->rq.stats;
418 
419 		/* collect stats from LRO */
420 		rq_stats->sw_lro_queued = rq->lro.lro_queued;
421 		rq_stats->sw_lro_flushed = rq->lro.lro_flushed;
422 		sw_lro_queued += rq_stats->sw_lro_queued;
423 		sw_lro_flushed += rq_stats->sw_lro_flushed;
424 		lro_packets += rq_stats->lro_packets;
425 		lro_bytes += rq_stats->lro_bytes;
426 		rx_csum_none += rq_stats->csum_none;
427 		rx_wqe_err += rq_stats->wqe_err;
428 
429 		for (j = 0; j < priv->num_tc; j++) {
430 			sq_stats = &priv->channel[i]->sq[j].stats;
431 			sq_br = priv->channel[i]->sq[j].br;
432 
433 			tso_packets += sq_stats->tso_packets;
434 			tso_bytes += sq_stats->tso_bytes;
435 			tx_queue_dropped += sq_stats->dropped;
436 			tx_queue_dropped += sq_br->br_drops;
437 			tx_defragged += sq_stats->defragged;
438 			tx_offload_none += sq_stats->csum_offload_none;
439 		}
440 	}
441 
442 	/* update counters */
443 	s->tso_packets = tso_packets;
444 	s->tso_bytes = tso_bytes;
445 	s->tx_queue_dropped = tx_queue_dropped;
446 	s->tx_defragged = tx_defragged;
447 	s->lro_packets = lro_packets;
448 	s->lro_bytes = lro_bytes;
449 	s->sw_lro_queued = sw_lro_queued;
450 	s->sw_lro_flushed = sw_lro_flushed;
451 	s->rx_csum_none = rx_csum_none;
452 	s->rx_wqe_err = rx_wqe_err;
453 
454 	/* HW counters */
455 	memset(in, 0, sizeof(in));
456 
457 	MLX5_SET(query_vport_counter_in, in, opcode,
458 	    MLX5_CMD_OP_QUERY_VPORT_COUNTER);
459 	MLX5_SET(query_vport_counter_in, in, op_mod, 0);
460 	MLX5_SET(query_vport_counter_in, in, other_vport, 0);
461 
462 	memset(out, 0, outlen);
463 
464 	/* get number of out-of-buffer drops first */
465 	if (mlx5_vport_query_out_of_rx_buffer(mdev, priv->counter_set_id,
466 	    &rx_out_of_buffer))
467 		goto free_out;
468 
469 	/* accumulate difference into a 64-bit counter */
470 	s->rx_out_of_buffer += (u64)(u32)(rx_out_of_buffer - s->rx_out_of_buffer_prev);
471 	s->rx_out_of_buffer_prev = rx_out_of_buffer;
472 
473 	/* get port statistics */
474 	if (mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen))
475 		goto free_out;
476 
477 #define	MLX5_GET_CTR(out, x) \
478 	MLX5_GET64(query_vport_counter_out, out, x)
479 
480 	s->rx_error_packets =
481 	    MLX5_GET_CTR(out, received_errors.packets);
482 	s->rx_error_bytes =
483 	    MLX5_GET_CTR(out, received_errors.octets);
484 	s->tx_error_packets =
485 	    MLX5_GET_CTR(out, transmit_errors.packets);
486 	s->tx_error_bytes =
487 	    MLX5_GET_CTR(out, transmit_errors.octets);
488 
489 	s->rx_unicast_packets =
490 	    MLX5_GET_CTR(out, received_eth_unicast.packets);
491 	s->rx_unicast_bytes =
492 	    MLX5_GET_CTR(out, received_eth_unicast.octets);
493 	s->tx_unicast_packets =
494 	    MLX5_GET_CTR(out, transmitted_eth_unicast.packets);
495 	s->tx_unicast_bytes =
496 	    MLX5_GET_CTR(out, transmitted_eth_unicast.octets);
497 
498 	s->rx_multicast_packets =
499 	    MLX5_GET_CTR(out, received_eth_multicast.packets);
500 	s->rx_multicast_bytes =
501 	    MLX5_GET_CTR(out, received_eth_multicast.octets);
502 	s->tx_multicast_packets =
503 	    MLX5_GET_CTR(out, transmitted_eth_multicast.packets);
504 	s->tx_multicast_bytes =
505 	    MLX5_GET_CTR(out, transmitted_eth_multicast.octets);
506 
507 	s->rx_broadcast_packets =
508 	    MLX5_GET_CTR(out, received_eth_broadcast.packets);
509 	s->rx_broadcast_bytes =
510 	    MLX5_GET_CTR(out, received_eth_broadcast.octets);
511 	s->tx_broadcast_packets =
512 	    MLX5_GET_CTR(out, transmitted_eth_broadcast.packets);
513 	s->tx_broadcast_bytes =
514 	    MLX5_GET_CTR(out, transmitted_eth_broadcast.octets);
515 
516 	s->rx_packets =
517 	    s->rx_unicast_packets +
518 	    s->rx_multicast_packets +
519 	    s->rx_broadcast_packets -
520 	    s->rx_out_of_buffer;
521 	s->rx_bytes =
522 	    s->rx_unicast_bytes +
523 	    s->rx_multicast_bytes +
524 	    s->rx_broadcast_bytes;
525 	s->tx_packets =
526 	    s->tx_unicast_packets +
527 	    s->tx_multicast_packets +
528 	    s->tx_broadcast_packets;
529 	s->tx_bytes =
530 	    s->tx_unicast_bytes +
531 	    s->tx_multicast_bytes +
532 	    s->tx_broadcast_bytes;
533 
534 	/* Update calculated offload counters */
535 	s->tx_csum_offload = s->tx_packets - tx_offload_none;
536 	s->rx_csum_good = s->rx_packets - s->rx_csum_none;
537 
538 	/* Update per port counters */
539 	mlx5e_update_pport_counters(priv);
540 
541 #if (__FreeBSD_version < 1100000)
542 	/* no get_counters interface in fbsd 10 */
543 	ifp->if_ipackets = s->rx_packets;
544 	ifp->if_ierrors = s->rx_error_packets;
545 	ifp->if_iqdrops = s->rx_out_of_buffer;
546 	ifp->if_opackets = s->tx_packets;
547 	ifp->if_oerrors = s->tx_error_packets;
548 	ifp->if_snd.ifq_drops = s->tx_queue_dropped;
549 	ifp->if_ibytes = s->rx_bytes;
550 	ifp->if_obytes = s->tx_bytes;
551 #endif
552 
553 free_out:
554 	kvfree(out);
555 	PRIV_UNLOCK(priv);
556 }
557 
558 static void
559 mlx5e_update_stats(void *arg)
560 {
561 	struct mlx5e_priv *priv = arg;
562 
563 	schedule_work(&priv->update_stats_work);
564 
565 	callout_reset(&priv->watchdog, hz, &mlx5e_update_stats, priv);
566 }
567 
568 static void
569 mlx5e_async_event_sub(struct mlx5e_priv *priv,
570     enum mlx5_dev_event event)
571 {
572 	switch (event) {
573 	case MLX5_DEV_EVENT_PORT_UP:
574 	case MLX5_DEV_EVENT_PORT_DOWN:
575 		schedule_work(&priv->update_carrier_work);
576 		break;
577 
578 	default:
579 		break;
580 	}
581 }
582 
583 static void
584 mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv,
585     enum mlx5_dev_event event, unsigned long param)
586 {
587 	struct mlx5e_priv *priv = vpriv;
588 
589 	mtx_lock(&priv->async_events_mtx);
590 	if (test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state))
591 		mlx5e_async_event_sub(priv, event);
592 	mtx_unlock(&priv->async_events_mtx);
593 }
594 
595 static void
596 mlx5e_enable_async_events(struct mlx5e_priv *priv)
597 {
598 	set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
599 }
600 
601 static void
602 mlx5e_disable_async_events(struct mlx5e_priv *priv)
603 {
604 	mtx_lock(&priv->async_events_mtx);
605 	clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
606 	mtx_unlock(&priv->async_events_mtx);
607 }
608 
609 static const char *mlx5e_rq_stats_desc[] = {
610 	MLX5E_RQ_STATS(MLX5E_STATS_DESC)
611 };
612 
613 static int
614 mlx5e_create_rq(struct mlx5e_channel *c,
615     struct mlx5e_rq_param *param,
616     struct mlx5e_rq *rq)
617 {
618 	struct mlx5e_priv *priv = c->priv;
619 	struct mlx5_core_dev *mdev = priv->mdev;
620 	char buffer[16];
621 	void *rqc = param->rqc;
622 	void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
623 	int wq_sz;
624 	int err;
625 	int i;
626 
627 	/* Create DMA descriptor TAG */
628 	if ((err = -bus_dma_tag_create(
629 	    bus_get_dma_tag(mdev->pdev->dev.bsddev),
630 	    1,				/* any alignment */
631 	    0,				/* no boundary */
632 	    BUS_SPACE_MAXADDR,		/* lowaddr */
633 	    BUS_SPACE_MAXADDR,		/* highaddr */
634 	    NULL, NULL,			/* filter, filterarg */
635 	    MJUM16BYTES,		/* maxsize */
636 	    1,				/* nsegments */
637 	    MJUM16BYTES,		/* maxsegsize */
638 	    0,				/* flags */
639 	    NULL, NULL,			/* lockfunc, lockfuncarg */
640 	    &rq->dma_tag)))
641 		goto done;
642 
643 	err = mlx5_wq_ll_create(mdev, &param->wq, rqc_wq, &rq->wq,
644 	    &rq->wq_ctrl);
645 	if (err)
646 		goto err_free_dma_tag;
647 
648 	rq->wq.db = &rq->wq.db[MLX5_RCV_DBR];
649 
650 	if (priv->params.hw_lro_en) {
651 		rq->wqe_sz = priv->params.lro_wqe_sz;
652 	} else {
653 		rq->wqe_sz = MLX5E_SW2MB_MTU(priv->ifp->if_mtu);
654 	}
655 	if (rq->wqe_sz > MJUM16BYTES) {
656 		err = -ENOMEM;
657 		goto err_rq_wq_destroy;
658 	} else if (rq->wqe_sz > MJUM9BYTES) {
659 		rq->wqe_sz = MJUM16BYTES;
660 	} else if (rq->wqe_sz > MJUMPAGESIZE) {
661 		rq->wqe_sz = MJUM9BYTES;
662 	} else if (rq->wqe_sz > MCLBYTES) {
663 		rq->wqe_sz = MJUMPAGESIZE;
664 	} else {
665 		rq->wqe_sz = MCLBYTES;
666 	}
667 
668 	wq_sz = mlx5_wq_ll_get_size(&rq->wq);
669 	rq->mbuf = malloc(wq_sz * sizeof(rq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO);
670 	if (rq->mbuf == NULL) {
671 		err = -ENOMEM;
672 		goto err_rq_wq_destroy;
673 	}
674 	for (i = 0; i != wq_sz; i++) {
675 		struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i);
676 		uint32_t byte_count = rq->wqe_sz - MLX5E_NET_IP_ALIGN;
677 
678 		err = -bus_dmamap_create(rq->dma_tag, 0, &rq->mbuf[i].dma_map);
679 		if (err != 0) {
680 			while (i--)
681 				bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map);
682 			goto err_rq_mbuf_free;
683 		}
684 		wqe->data.lkey = c->mkey_be;
685 		wqe->data.byte_count = cpu_to_be32(byte_count | MLX5_HW_START_PADDING);
686 	}
687 
688 	rq->pdev = c->pdev;
689 	rq->ifp = c->ifp;
690 	rq->channel = c;
691 	rq->ix = c->ix;
692 
693 	snprintf(buffer, sizeof(buffer), "rxstat%d", c->ix);
694 	mlx5e_create_stats(&rq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
695 	    buffer, mlx5e_rq_stats_desc, MLX5E_RQ_STATS_NUM,
696 	    rq->stats.arg);
697 
698 #ifdef HAVE_TURBO_LRO
699 	if (tcp_tlro_init(&rq->lro, c->ifp, MLX5E_BUDGET_MAX) != 0)
700 		rq->lro.mbuf = NULL;
701 #else
702 	if (tcp_lro_init(&rq->lro))
703 		rq->lro.lro_cnt = 0;
704 	else
705 		rq->lro.ifp = c->ifp;
706 #endif
707 	return (0);
708 
709 err_rq_mbuf_free:
710 	free(rq->mbuf, M_MLX5EN);
711 err_rq_wq_destroy:
712 	mlx5_wq_destroy(&rq->wq_ctrl);
713 err_free_dma_tag:
714 	bus_dma_tag_destroy(rq->dma_tag);
715 done:
716 	return (err);
717 }
718 
719 static void
720 mlx5e_destroy_rq(struct mlx5e_rq *rq)
721 {
722 	int wq_sz;
723 	int i;
724 
725 	/* destroy all sysctl nodes */
726 	sysctl_ctx_free(&rq->stats.ctx);
727 
728 	/* free leftover LRO packets, if any */
729 #ifdef HAVE_TURBO_LRO
730 	tcp_tlro_free(&rq->lro);
731 #else
732 	tcp_lro_free(&rq->lro);
733 #endif
734 	wq_sz = mlx5_wq_ll_get_size(&rq->wq);
735 	for (i = 0; i != wq_sz; i++) {
736 		if (rq->mbuf[i].mbuf != NULL) {
737 			bus_dmamap_unload(rq->dma_tag,
738 			    rq->mbuf[i].dma_map);
739 			m_freem(rq->mbuf[i].mbuf);
740 		}
741 		bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map);
742 	}
743 	free(rq->mbuf, M_MLX5EN);
744 	mlx5_wq_destroy(&rq->wq_ctrl);
745 }
746 
747 static int
748 mlx5e_enable_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param)
749 {
750 	struct mlx5e_channel *c = rq->channel;
751 	struct mlx5e_priv *priv = c->priv;
752 	struct mlx5_core_dev *mdev = priv->mdev;
753 
754 	void *in;
755 	void *rqc;
756 	void *wq;
757 	int inlen;
758 	int err;
759 
760 	inlen = MLX5_ST_SZ_BYTES(create_rq_in) +
761 	    sizeof(u64) * rq->wq_ctrl.buf.npages;
762 	in = mlx5_vzalloc(inlen);
763 	if (in == NULL)
764 		return (-ENOMEM);
765 
766 	rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
767 	wq = MLX5_ADDR_OF(rqc, rqc, wq);
768 
769 	memcpy(rqc, param->rqc, sizeof(param->rqc));
770 
771 	MLX5_SET(rqc, rqc, cqn, c->rq.cq.mcq.cqn);
772 	MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
773 	MLX5_SET(rqc, rqc, flush_in_error_en, 1);
774 	if (priv->counter_set_id >= 0)
775 		MLX5_SET(rqc, rqc, counter_set_id, priv->counter_set_id);
776 	MLX5_SET(wq, wq, log_wq_pg_sz, rq->wq_ctrl.buf.page_shift -
777 	    PAGE_SHIFT);
778 	MLX5_SET64(wq, wq, dbr_addr, rq->wq_ctrl.db.dma);
779 
780 	mlx5_fill_page_array(&rq->wq_ctrl.buf,
781 	    (__be64 *) MLX5_ADDR_OF(wq, wq, pas));
782 
783 	err = mlx5_core_create_rq(mdev, in, inlen, &rq->rqn);
784 
785 	kvfree(in);
786 
787 	return (err);
788 }
789 
790 static int
791 mlx5e_modify_rq(struct mlx5e_rq *rq, int curr_state, int next_state)
792 {
793 	struct mlx5e_channel *c = rq->channel;
794 	struct mlx5e_priv *priv = c->priv;
795 	struct mlx5_core_dev *mdev = priv->mdev;
796 
797 	void *in;
798 	void *rqc;
799 	int inlen;
800 	int err;
801 
802 	inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
803 	in = mlx5_vzalloc(inlen);
804 	if (in == NULL)
805 		return (-ENOMEM);
806 
807 	rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
808 
809 	MLX5_SET(modify_rq_in, in, rqn, rq->rqn);
810 	MLX5_SET(modify_rq_in, in, rq_state, curr_state);
811 	MLX5_SET(rqc, rqc, state, next_state);
812 
813 	err = mlx5_core_modify_rq(mdev, in, inlen);
814 
815 	kvfree(in);
816 
817 	return (err);
818 }
819 
820 static void
821 mlx5e_disable_rq(struct mlx5e_rq *rq)
822 {
823 	struct mlx5e_channel *c = rq->channel;
824 	struct mlx5e_priv *priv = c->priv;
825 	struct mlx5_core_dev *mdev = priv->mdev;
826 
827 	mlx5_core_destroy_rq(mdev, rq->rqn);
828 }
829 
830 static int
831 mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq)
832 {
833 	struct mlx5e_channel *c = rq->channel;
834 	struct mlx5e_priv *priv = c->priv;
835 	struct mlx5_wq_ll *wq = &rq->wq;
836 	int i;
837 
838 	for (i = 0; i < 1000; i++) {
839 		if (wq->cur_sz >= priv->params.min_rx_wqes)
840 			return (0);
841 
842 		msleep(4);
843 	}
844 	return (-ETIMEDOUT);
845 }
846 
847 static int
848 mlx5e_open_rq(struct mlx5e_channel *c,
849     struct mlx5e_rq_param *param,
850     struct mlx5e_rq *rq)
851 {
852 	int err;
853 
854 	err = mlx5e_create_rq(c, param, rq);
855 	if (err)
856 		return (err);
857 
858 	err = mlx5e_enable_rq(rq, param);
859 	if (err)
860 		goto err_destroy_rq;
861 
862 	err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
863 	if (err)
864 		goto err_disable_rq;
865 
866 	c->rq.enabled = 1;
867 
868 	return (0);
869 
870 err_disable_rq:
871 	mlx5e_disable_rq(rq);
872 err_destroy_rq:
873 	mlx5e_destroy_rq(rq);
874 
875 	return (err);
876 }
877 
878 static void
879 mlx5e_close_rq(struct mlx5e_rq *rq)
880 {
881 	rq->enabled = 0;
882 	mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
883 }
884 
885 static void
886 mlx5e_close_rq_wait(struct mlx5e_rq *rq)
887 {
888 	/* wait till RQ is empty */
889 	while (!mlx5_wq_ll_is_empty(&rq->wq)) {
890 		msleep(4);
891 		rq->cq.mcq.comp(&rq->cq.mcq);
892 	}
893 
894 	mlx5e_disable_rq(rq);
895 	mlx5e_destroy_rq(rq);
896 }
897 
898 static void
899 mlx5e_free_sq_db(struct mlx5e_sq *sq)
900 {
901 	int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
902 	int x;
903 
904 	for (x = 0; x != wq_sz; x++)
905 		bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map);
906 	free(sq->mbuf, M_MLX5EN);
907 }
908 
909 static int
910 mlx5e_alloc_sq_db(struct mlx5e_sq *sq)
911 {
912 	int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
913 	int err;
914 	int x;
915 
916 	sq->mbuf = malloc(wq_sz * sizeof(sq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO);
917 	if (sq->mbuf == NULL)
918 		return (-ENOMEM);
919 
920 	/* Create DMA descriptor MAPs */
921 	for (x = 0; x != wq_sz; x++) {
922 		err = -bus_dmamap_create(sq->dma_tag, 0, &sq->mbuf[x].dma_map);
923 		if (err != 0) {
924 			while (x--)
925 				bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map);
926 			free(sq->mbuf, M_MLX5EN);
927 			return (err);
928 		}
929 	}
930 	return (0);
931 }
932 
933 static const char *mlx5e_sq_stats_desc[] = {
934 	MLX5E_SQ_STATS(MLX5E_STATS_DESC)
935 };
936 
937 static int
938 mlx5e_create_sq(struct mlx5e_channel *c,
939     int tc,
940     struct mlx5e_sq_param *param,
941     struct mlx5e_sq *sq)
942 {
943 	struct mlx5e_priv *priv = c->priv;
944 	struct mlx5_core_dev *mdev = priv->mdev;
945 	char buffer[16];
946 
947 	void *sqc = param->sqc;
948 	void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq);
949 #ifdef RSS
950 	cpuset_t cpu_mask;
951 	int cpu_id;
952 #endif
953 	int err;
954 
955 	/* Create DMA descriptor TAG */
956 	if ((err = -bus_dma_tag_create(
957 	    bus_get_dma_tag(mdev->pdev->dev.bsddev),
958 	    1,				/* any alignment */
959 	    0,				/* no boundary */
960 	    BUS_SPACE_MAXADDR,		/* lowaddr */
961 	    BUS_SPACE_MAXADDR,		/* highaddr */
962 	    NULL, NULL,			/* filter, filterarg */
963 	    MLX5E_MAX_TX_PAYLOAD_SIZE,	/* maxsize */
964 	    MLX5E_MAX_TX_MBUF_FRAGS,	/* nsegments */
965 	    MLX5E_MAX_TX_MBUF_SIZE,	/* maxsegsize */
966 	    0,				/* flags */
967 	    NULL, NULL,			/* lockfunc, lockfuncarg */
968 	    &sq->dma_tag)))
969 		goto done;
970 
971 	err = mlx5_alloc_map_uar(mdev, &sq->uar);
972 	if (err)
973 		goto err_free_dma_tag;
974 
975 	err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq,
976 	    &sq->wq_ctrl);
977 	if (err)
978 		goto err_unmap_free_uar;
979 
980 	sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
981 	sq->uar_map = sq->uar.map;
982 	sq->uar_bf_map = sq->uar.bf_map;
983 	sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2;
984 
985 	err = mlx5e_alloc_sq_db(sq);
986 	if (err)
987 		goto err_sq_wq_destroy;
988 
989 	sq->pdev = c->pdev;
990 	sq->mkey_be = c->mkey_be;
991 	sq->channel = c;
992 	sq->tc = tc;
993 
994 	sq->br = buf_ring_alloc(MLX5E_SQ_TX_QUEUE_SIZE, M_MLX5EN,
995 	    M_WAITOK, &sq->lock);
996 	if (sq->br == NULL) {
997 		if_printf(c->ifp, "%s: Failed allocating sq drbr buffer\n",
998 		    __func__);
999 		err = -ENOMEM;
1000 		goto err_free_sq_db;
1001 	}
1002 
1003 	sq->sq_tq = taskqueue_create_fast("mlx5e_que", M_WAITOK,
1004 	    taskqueue_thread_enqueue, &sq->sq_tq);
1005 	if (sq->sq_tq == NULL) {
1006 		if_printf(c->ifp, "%s: Failed allocating taskqueue\n",
1007 		    __func__);
1008 		err = -ENOMEM;
1009 		goto err_free_drbr;
1010 	}
1011 
1012 	TASK_INIT(&sq->sq_task, 0, mlx5e_tx_que, sq);
1013 #ifdef RSS
1014 	cpu_id = rss_getcpu(c->ix % rss_getnumbuckets());
1015 	CPU_SETOF(cpu_id, &cpu_mask);
1016 	taskqueue_start_threads_cpuset(&sq->sq_tq, 1, PI_NET, &cpu_mask,
1017 	    "%s TX SQ%d.%d CPU%d", c->ifp->if_xname, c->ix, tc, cpu_id);
1018 #else
1019 	taskqueue_start_threads(&sq->sq_tq, 1, PI_NET,
1020 	    "%s TX SQ%d.%d", c->ifp->if_xname, c->ix, tc);
1021 #endif
1022 	snprintf(buffer, sizeof(buffer), "txstat%dtc%d", c->ix, tc);
1023 	mlx5e_create_stats(&sq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
1024 	    buffer, mlx5e_sq_stats_desc, MLX5E_SQ_STATS_NUM,
1025 	    sq->stats.arg);
1026 
1027 	return (0);
1028 
1029 err_free_drbr:
1030 	buf_ring_free(sq->br, M_MLX5EN);
1031 err_free_sq_db:
1032 	mlx5e_free_sq_db(sq);
1033 err_sq_wq_destroy:
1034 	mlx5_wq_destroy(&sq->wq_ctrl);
1035 
1036 err_unmap_free_uar:
1037 	mlx5_unmap_free_uar(mdev, &sq->uar);
1038 
1039 err_free_dma_tag:
1040 	bus_dma_tag_destroy(sq->dma_tag);
1041 done:
1042 	return (err);
1043 }
1044 
1045 static void
1046 mlx5e_destroy_sq(struct mlx5e_sq *sq)
1047 {
1048 	struct mlx5e_channel *c = sq->channel;
1049 	struct mlx5e_priv *priv = c->priv;
1050 
1051 	/* destroy all sysctl nodes */
1052 	sysctl_ctx_free(&sq->stats.ctx);
1053 
1054 	mlx5e_free_sq_db(sq);
1055 	mlx5_wq_destroy(&sq->wq_ctrl);
1056 	mlx5_unmap_free_uar(priv->mdev, &sq->uar);
1057 	taskqueue_drain(sq->sq_tq, &sq->sq_task);
1058 	taskqueue_free(sq->sq_tq);
1059 	buf_ring_free(sq->br, M_MLX5EN);
1060 }
1061 
1062 static int
1063 mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param)
1064 {
1065 	struct mlx5e_channel *c = sq->channel;
1066 	struct mlx5e_priv *priv = c->priv;
1067 	struct mlx5_core_dev *mdev = priv->mdev;
1068 
1069 	void *in;
1070 	void *sqc;
1071 	void *wq;
1072 	int inlen;
1073 	int err;
1074 
1075 	inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
1076 	    sizeof(u64) * sq->wq_ctrl.buf.npages;
1077 	in = mlx5_vzalloc(inlen);
1078 	if (in == NULL)
1079 		return (-ENOMEM);
1080 
1081 	sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
1082 	wq = MLX5_ADDR_OF(sqc, sqc, wq);
1083 
1084 	memcpy(sqc, param->sqc, sizeof(param->sqc));
1085 
1086 	MLX5_SET(sqc, sqc, tis_num_0, priv->tisn[sq->tc]);
1087 	MLX5_SET(sqc, sqc, cqn, c->sq[sq->tc].cq.mcq.cqn);
1088 	MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
1089 	MLX5_SET(sqc, sqc, tis_lst_sz, 1);
1090 	MLX5_SET(sqc, sqc, flush_in_error_en, 1);
1091 
1092 	MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
1093 	MLX5_SET(wq, wq, uar_page, sq->uar.index);
1094 	MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift -
1095 	    PAGE_SHIFT);
1096 	MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma);
1097 
1098 	mlx5_fill_page_array(&sq->wq_ctrl.buf,
1099 	    (__be64 *) MLX5_ADDR_OF(wq, wq, pas));
1100 
1101 	err = mlx5_core_create_sq(mdev, in, inlen, &sq->sqn);
1102 
1103 	kvfree(in);
1104 
1105 	return (err);
1106 }
1107 
1108 static int
1109 mlx5e_modify_sq(struct mlx5e_sq *sq, int curr_state, int next_state)
1110 {
1111 	struct mlx5e_channel *c = sq->channel;
1112 	struct mlx5e_priv *priv = c->priv;
1113 	struct mlx5_core_dev *mdev = priv->mdev;
1114 
1115 	void *in;
1116 	void *sqc;
1117 	int inlen;
1118 	int err;
1119 
1120 	inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
1121 	in = mlx5_vzalloc(inlen);
1122 	if (in == NULL)
1123 		return (-ENOMEM);
1124 
1125 	sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
1126 
1127 	MLX5_SET(modify_sq_in, in, sqn, sq->sqn);
1128 	MLX5_SET(modify_sq_in, in, sq_state, curr_state);
1129 	MLX5_SET(sqc, sqc, state, next_state);
1130 
1131 	err = mlx5_core_modify_sq(mdev, in, inlen);
1132 
1133 	kvfree(in);
1134 
1135 	return (err);
1136 }
1137 
1138 static void
1139 mlx5e_disable_sq(struct mlx5e_sq *sq)
1140 {
1141 	struct mlx5e_channel *c = sq->channel;
1142 	struct mlx5e_priv *priv = c->priv;
1143 	struct mlx5_core_dev *mdev = priv->mdev;
1144 
1145 	mlx5_core_destroy_sq(mdev, sq->sqn);
1146 }
1147 
1148 static int
1149 mlx5e_open_sq(struct mlx5e_channel *c,
1150     int tc,
1151     struct mlx5e_sq_param *param,
1152     struct mlx5e_sq *sq)
1153 {
1154 	int err;
1155 
1156 	err = mlx5e_create_sq(c, tc, param, sq);
1157 	if (err)
1158 		return (err);
1159 
1160 	err = mlx5e_enable_sq(sq, param);
1161 	if (err)
1162 		goto err_destroy_sq;
1163 
1164 	err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY);
1165 	if (err)
1166 		goto err_disable_sq;
1167 
1168 	atomic_store_rel_int(&sq->queue_state, MLX5E_SQ_READY);
1169 
1170 	return (0);
1171 
1172 err_disable_sq:
1173 	mlx5e_disable_sq(sq);
1174 err_destroy_sq:
1175 	mlx5e_destroy_sq(sq);
1176 
1177 	return (err);
1178 }
1179 
1180 static void
1181 mlx5e_sq_send_nops_locked(struct mlx5e_sq *sq, int can_sleep)
1182 {
1183 	/* fill up remainder with NOPs */
1184 	while (sq->cev_counter != 0) {
1185 		while (!mlx5e_sq_has_room_for(sq, 1)) {
1186 			if (can_sleep != 0) {
1187 				mtx_unlock(&sq->lock);
1188 				msleep(4);
1189 				mtx_lock(&sq->lock);
1190 			} else {
1191 				goto done;
1192 			}
1193 		}
1194 		/* send a single NOP */
1195 		mlx5e_send_nop(sq, 1);
1196 		wmb();
1197 	}
1198 done:
1199 	/* Check if we need to write the doorbell */
1200 	if (likely(sq->doorbell.d64 != 0)) {
1201 		mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0);
1202 		sq->doorbell.d64 = 0;
1203 	}
1204 	return;
1205 }
1206 
1207 void
1208 mlx5e_sq_cev_timeout(void *arg)
1209 {
1210 	struct mlx5e_sq *sq = arg;
1211 
1212 	mtx_assert(&sq->lock, MA_OWNED);
1213 
1214 	/* check next state */
1215 	switch (sq->cev_next_state) {
1216 	case MLX5E_CEV_STATE_SEND_NOPS:
1217 		/* fill TX ring with NOPs, if any */
1218 		mlx5e_sq_send_nops_locked(sq, 0);
1219 
1220 		/* check if completed */
1221 		if (sq->cev_counter == 0) {
1222 			sq->cev_next_state = MLX5E_CEV_STATE_INITIAL;
1223 			return;
1224 		}
1225 		break;
1226 	default:
1227 		/* send NOPs on next timeout */
1228 		sq->cev_next_state = MLX5E_CEV_STATE_SEND_NOPS;
1229 		break;
1230 	}
1231 
1232 	/* restart timer */
1233 	callout_reset_curcpu(&sq->cev_callout, hz, mlx5e_sq_cev_timeout, sq);
1234 }
1235 
1236 static void
1237 mlx5e_close_sq_wait(struct mlx5e_sq *sq)
1238 {
1239 
1240 	mtx_lock(&sq->lock);
1241 	/* teardown event factor timer, if any */
1242 	sq->cev_next_state = MLX5E_CEV_STATE_HOLD_NOPS;
1243 	callout_stop(&sq->cev_callout);
1244 
1245 	/* send dummy NOPs in order to flush the transmit ring */
1246 	mlx5e_sq_send_nops_locked(sq, 1);
1247 	mtx_unlock(&sq->lock);
1248 
1249 	/* make sure it is safe to free the callout */
1250 	callout_drain(&sq->cev_callout);
1251 
1252 	/* error out remaining requests */
1253 	mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR);
1254 
1255 	/* wait till SQ is empty */
1256 	mtx_lock(&sq->lock);
1257 	while (sq->cc != sq->pc) {
1258 		mtx_unlock(&sq->lock);
1259 		msleep(4);
1260 		sq->cq.mcq.comp(&sq->cq.mcq);
1261 		mtx_lock(&sq->lock);
1262 	}
1263 	mtx_unlock(&sq->lock);
1264 
1265 	mlx5e_disable_sq(sq);
1266 	mlx5e_destroy_sq(sq);
1267 }
1268 
1269 static int
1270 mlx5e_create_cq(struct mlx5e_channel *c,
1271     struct mlx5e_cq_param *param,
1272     struct mlx5e_cq *cq,
1273     mlx5e_cq_comp_t *comp)
1274 {
1275 	struct mlx5e_priv *priv = c->priv;
1276 	struct mlx5_core_dev *mdev = priv->mdev;
1277 	struct mlx5_core_cq *mcq = &cq->mcq;
1278 	int eqn_not_used;
1279 	int irqn;
1280 	int err;
1281 	u32 i;
1282 
1283 	param->wq.buf_numa_node = 0;
1284 	param->wq.db_numa_node = 0;
1285 	param->eq_ix = c->ix;
1286 
1287 	err = mlx5_cqwq_create(mdev, &param->wq, param->cqc, &cq->wq,
1288 	    &cq->wq_ctrl);
1289 	if (err)
1290 		return (err);
1291 
1292 	mlx5_vector2eqn(mdev, param->eq_ix, &eqn_not_used, &irqn);
1293 
1294 	mcq->cqe_sz = 64;
1295 	mcq->set_ci_db = cq->wq_ctrl.db.db;
1296 	mcq->arm_db = cq->wq_ctrl.db.db + 1;
1297 	*mcq->set_ci_db = 0;
1298 	*mcq->arm_db = 0;
1299 	mcq->vector = param->eq_ix;
1300 	mcq->comp = comp;
1301 	mcq->event = mlx5e_cq_error_event;
1302 	mcq->irqn = irqn;
1303 	mcq->uar = &priv->cq_uar;
1304 
1305 	for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
1306 		struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
1307 
1308 		cqe->op_own = 0xf1;
1309 	}
1310 
1311 	cq->channel = c;
1312 
1313 	return (0);
1314 }
1315 
1316 static void
1317 mlx5e_destroy_cq(struct mlx5e_cq *cq)
1318 {
1319 	mlx5_wq_destroy(&cq->wq_ctrl);
1320 }
1321 
1322 static int
1323 mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param,
1324     u8 moderation_mode)
1325 {
1326 	struct mlx5e_channel *c = cq->channel;
1327 	struct mlx5e_priv *priv = c->priv;
1328 	struct mlx5_core_dev *mdev = priv->mdev;
1329 	struct mlx5_core_cq *mcq = &cq->mcq;
1330 	void *in;
1331 	void *cqc;
1332 	int inlen;
1333 	int irqn_not_used;
1334 	int eqn;
1335 	int err;
1336 
1337 	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
1338 	    sizeof(u64) * cq->wq_ctrl.buf.npages;
1339 	in = mlx5_vzalloc(inlen);
1340 	if (in == NULL)
1341 		return (-ENOMEM);
1342 
1343 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
1344 
1345 	memcpy(cqc, param->cqc, sizeof(param->cqc));
1346 
1347 	mlx5_fill_page_array(&cq->wq_ctrl.buf,
1348 	    (__be64 *) MLX5_ADDR_OF(create_cq_in, in, pas));
1349 
1350 	mlx5_vector2eqn(mdev, param->eq_ix, &eqn, &irqn_not_used);
1351 
1352 	MLX5_SET(cqc, cqc, cq_period_mode, moderation_mode);
1353 	MLX5_SET(cqc, cqc, c_eqn, eqn);
1354 	MLX5_SET(cqc, cqc, uar_page, mcq->uar->index);
1355 	MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
1356 	    PAGE_SHIFT);
1357 	MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
1358 
1359 	err = mlx5_core_create_cq(mdev, mcq, in, inlen);
1360 
1361 	kvfree(in);
1362 
1363 	if (err)
1364 		return (err);
1365 
1366 	mlx5e_cq_arm(cq);
1367 
1368 	return (0);
1369 }
1370 
1371 static void
1372 mlx5e_disable_cq(struct mlx5e_cq *cq)
1373 {
1374 	struct mlx5e_channel *c = cq->channel;
1375 	struct mlx5e_priv *priv = c->priv;
1376 	struct mlx5_core_dev *mdev = priv->mdev;
1377 
1378 	mlx5_core_destroy_cq(mdev, &cq->mcq);
1379 }
1380 
1381 static int
1382 mlx5e_open_cq(struct mlx5e_channel *c,
1383     struct mlx5e_cq_param *param,
1384     struct mlx5e_cq *cq,
1385     mlx5e_cq_comp_t *comp,
1386     u8 moderation_mode)
1387 {
1388 	int err;
1389 
1390 	err = mlx5e_create_cq(c, param, cq, comp);
1391 	if (err)
1392 		return (err);
1393 
1394 	err = mlx5e_enable_cq(cq, param, moderation_mode);
1395 	if (err)
1396 		goto err_destroy_cq;
1397 
1398 	return (0);
1399 
1400 err_destroy_cq:
1401 	mlx5e_destroy_cq(cq);
1402 
1403 	return (err);
1404 }
1405 
1406 static void
1407 mlx5e_close_cq(struct mlx5e_cq *cq)
1408 {
1409 	mlx5e_disable_cq(cq);
1410 	mlx5e_destroy_cq(cq);
1411 }
1412 
1413 static int
1414 mlx5e_open_tx_cqs(struct mlx5e_channel *c,
1415     struct mlx5e_channel_param *cparam)
1416 {
1417 	u8 tx_moderation_mode;
1418 	int err;
1419 	int tc;
1420 
1421 	switch (c->priv->params.tx_cq_moderation_mode) {
1422 	case 0:
1423 		tx_moderation_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
1424 		break;
1425 	default:
1426 		if (MLX5_CAP_GEN(c->priv->mdev, cq_period_start_from_cqe))
1427 			tx_moderation_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE;
1428 		else
1429 			tx_moderation_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
1430 		break;
1431 	}
1432 	for (tc = 0; tc < c->num_tc; tc++) {
1433 		/* open completion queue */
1434 		err = mlx5e_open_cq(c, &cparam->tx_cq, &c->sq[tc].cq,
1435 		    &mlx5e_tx_cq_comp, tx_moderation_mode);
1436 		if (err)
1437 			goto err_close_tx_cqs;
1438 	}
1439 	return (0);
1440 
1441 err_close_tx_cqs:
1442 	for (tc--; tc >= 0; tc--)
1443 		mlx5e_close_cq(&c->sq[tc].cq);
1444 
1445 	return (err);
1446 }
1447 
1448 static void
1449 mlx5e_close_tx_cqs(struct mlx5e_channel *c)
1450 {
1451 	int tc;
1452 
1453 	for (tc = 0; tc < c->num_tc; tc++)
1454 		mlx5e_close_cq(&c->sq[tc].cq);
1455 }
1456 
1457 static int
1458 mlx5e_open_sqs(struct mlx5e_channel *c,
1459     struct mlx5e_channel_param *cparam)
1460 {
1461 	int err;
1462 	int tc;
1463 
1464 	for (tc = 0; tc < c->num_tc; tc++) {
1465 		err = mlx5e_open_sq(c, tc, &cparam->sq, &c->sq[tc]);
1466 		if (err)
1467 			goto err_close_sqs;
1468 	}
1469 
1470 	return (0);
1471 
1472 err_close_sqs:
1473 	for (tc--; tc >= 0; tc--)
1474 		mlx5e_close_sq_wait(&c->sq[tc]);
1475 
1476 	return (err);
1477 }
1478 
1479 static void
1480 mlx5e_close_sqs_wait(struct mlx5e_channel *c)
1481 {
1482 	int tc;
1483 
1484 	for (tc = 0; tc < c->num_tc; tc++)
1485 		mlx5e_close_sq_wait(&c->sq[tc]);
1486 }
1487 
1488 static void
1489 mlx5e_chan_mtx_init(struct mlx5e_channel *c)
1490 {
1491 	int tc;
1492 
1493 	mtx_init(&c->rq.mtx, "mlx5rx", MTX_NETWORK_LOCK, MTX_DEF);
1494 
1495 	for (tc = 0; tc < c->num_tc; tc++) {
1496 		struct mlx5e_sq *sq = c->sq + tc;
1497 
1498 		mtx_init(&sq->lock, "mlx5tx", MTX_NETWORK_LOCK, MTX_DEF);
1499 		mtx_init(&sq->comp_lock, "mlx5comp", MTX_NETWORK_LOCK,
1500 		    MTX_DEF);
1501 
1502 		callout_init_mtx(&sq->cev_callout, &sq->lock, 0);
1503 
1504 		sq->cev_factor = c->priv->params_ethtool.tx_completion_fact;
1505 
1506 		/* ensure the TX completion event factor is not zero */
1507 		if (sq->cev_factor == 0)
1508 			sq->cev_factor = 1;
1509 	}
1510 }
1511 
1512 static void
1513 mlx5e_chan_mtx_destroy(struct mlx5e_channel *c)
1514 {
1515 	int tc;
1516 
1517 	mtx_destroy(&c->rq.mtx);
1518 
1519 	for (tc = 0; tc < c->num_tc; tc++) {
1520 		mtx_destroy(&c->sq[tc].lock);
1521 		mtx_destroy(&c->sq[tc].comp_lock);
1522 	}
1523 }
1524 
1525 static int
1526 mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
1527     struct mlx5e_channel_param *cparam,
1528     struct mlx5e_channel *volatile *cp)
1529 {
1530 	struct mlx5e_channel *c;
1531 	u8 rx_moderation_mode;
1532 	int err;
1533 
1534 	c = malloc(sizeof(*c), M_MLX5EN, M_WAITOK | M_ZERO);
1535 	if (c == NULL)
1536 		return (-ENOMEM);
1537 
1538 	c->priv = priv;
1539 	c->ix = ix;
1540 	c->cpu = 0;
1541 	c->pdev = &priv->mdev->pdev->dev;
1542 	c->ifp = priv->ifp;
1543 	c->mkey_be = cpu_to_be32(priv->mr.key);
1544 	c->num_tc = priv->num_tc;
1545 
1546 	/* init mutexes */
1547 	mlx5e_chan_mtx_init(c);
1548 
1549 	/* open transmit completion queue */
1550 	err = mlx5e_open_tx_cqs(c, cparam);
1551 	if (err)
1552 		goto err_free;
1553 
1554 	switch (priv->params.rx_cq_moderation_mode) {
1555 	case 0:
1556 		rx_moderation_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
1557 		break;
1558 	default:
1559 		if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
1560 			rx_moderation_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE;
1561 		else
1562 			rx_moderation_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
1563 		break;
1564 	}
1565 
1566 	/* open receive completion queue */
1567 	err = mlx5e_open_cq(c, &cparam->rx_cq, &c->rq.cq,
1568 	    &mlx5e_rx_cq_comp, rx_moderation_mode);
1569 	if (err)
1570 		goto err_close_tx_cqs;
1571 
1572 	err = mlx5e_open_sqs(c, cparam);
1573 	if (err)
1574 		goto err_close_rx_cq;
1575 
1576 	err = mlx5e_open_rq(c, &cparam->rq, &c->rq);
1577 	if (err)
1578 		goto err_close_sqs;
1579 
1580 	/* store channel pointer */
1581 	*cp = c;
1582 
1583 	/* poll receive queue initially */
1584 	c->rq.cq.mcq.comp(&c->rq.cq.mcq);
1585 
1586 	return (0);
1587 
1588 err_close_sqs:
1589 	mlx5e_close_sqs_wait(c);
1590 
1591 err_close_rx_cq:
1592 	mlx5e_close_cq(&c->rq.cq);
1593 
1594 err_close_tx_cqs:
1595 	mlx5e_close_tx_cqs(c);
1596 
1597 err_free:
1598 	/* destroy mutexes */
1599 	mlx5e_chan_mtx_destroy(c);
1600 	free(c, M_MLX5EN);
1601 	return (err);
1602 }
1603 
1604 static void
1605 mlx5e_close_channel(struct mlx5e_channel *volatile *pp)
1606 {
1607 	struct mlx5e_channel *c = *pp;
1608 
1609 	/* check if channel is already closed */
1610 	if (c == NULL)
1611 		return;
1612 	mlx5e_close_rq(&c->rq);
1613 }
1614 
1615 static void
1616 mlx5e_close_channel_wait(struct mlx5e_channel *volatile *pp)
1617 {
1618 	struct mlx5e_channel *c = *pp;
1619 
1620 	/* check if channel is already closed */
1621 	if (c == NULL)
1622 		return;
1623 	/* ensure channel pointer is no longer used */
1624 	*pp = NULL;
1625 
1626 	mlx5e_close_rq_wait(&c->rq);
1627 	mlx5e_close_sqs_wait(c);
1628 	mlx5e_close_cq(&c->rq.cq);
1629 	mlx5e_close_tx_cqs(c);
1630 	/* destroy mutexes */
1631 	mlx5e_chan_mtx_destroy(c);
1632 	free(c, M_MLX5EN);
1633 }
1634 
1635 static void
1636 mlx5e_build_rq_param(struct mlx5e_priv *priv,
1637     struct mlx5e_rq_param *param)
1638 {
1639 	void *rqc = param->rqc;
1640 	void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
1641 
1642 	MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST);
1643 	MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
1644 	MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe)));
1645 	MLX5_SET(wq, wq, log_wq_sz, priv->params.log_rq_size);
1646 	MLX5_SET(wq, wq, pd, priv->pdn);
1647 
1648 	param->wq.buf_numa_node = 0;
1649 	param->wq.db_numa_node = 0;
1650 	param->wq.linear = 1;
1651 }
1652 
1653 static void
1654 mlx5e_build_sq_param(struct mlx5e_priv *priv,
1655     struct mlx5e_sq_param *param)
1656 {
1657 	void *sqc = param->sqc;
1658 	void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
1659 
1660 	MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size);
1661 	MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
1662 	MLX5_SET(wq, wq, pd, priv->pdn);
1663 
1664 	param->wq.buf_numa_node = 0;
1665 	param->wq.db_numa_node = 0;
1666 	param->wq.linear = 1;
1667 }
1668 
1669 static void
1670 mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
1671     struct mlx5e_cq_param *param)
1672 {
1673 	void *cqc = param->cqc;
1674 
1675 	MLX5_SET(cqc, cqc, uar_page, priv->cq_uar.index);
1676 }
1677 
1678 static void
1679 mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
1680     struct mlx5e_cq_param *param)
1681 {
1682 	void *cqc = param->cqc;
1683 
1684 
1685 	/*
1686 	 * TODO The sysctl to control on/off is a bool value for now, which means
1687 	 * we only support CSUM, once HASH is implemnted we'll need to address that.
1688 	 */
1689 	if (priv->params.cqe_zipping_en) {
1690 		MLX5_SET(cqc, cqc, mini_cqe_res_format, MLX5_CQE_FORMAT_CSUM);
1691 		MLX5_SET(cqc, cqc, cqe_compression_en, 1);
1692 	}
1693 
1694 	MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_rq_size);
1695 	MLX5_SET(cqc, cqc, cq_period, priv->params.rx_cq_moderation_usec);
1696 	MLX5_SET(cqc, cqc, cq_max_count, priv->params.rx_cq_moderation_pkts);
1697 
1698 	mlx5e_build_common_cq_param(priv, param);
1699 }
1700 
1701 static void
1702 mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
1703     struct mlx5e_cq_param *param)
1704 {
1705 	void *cqc = param->cqc;
1706 
1707 	MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_sq_size);
1708 	MLX5_SET(cqc, cqc, cq_period, priv->params.tx_cq_moderation_usec);
1709 	MLX5_SET(cqc, cqc, cq_max_count, priv->params.tx_cq_moderation_pkts);
1710 
1711 	mlx5e_build_common_cq_param(priv, param);
1712 }
1713 
1714 static void
1715 mlx5e_build_channel_param(struct mlx5e_priv *priv,
1716     struct mlx5e_channel_param *cparam)
1717 {
1718 	memset(cparam, 0, sizeof(*cparam));
1719 
1720 	mlx5e_build_rq_param(priv, &cparam->rq);
1721 	mlx5e_build_sq_param(priv, &cparam->sq);
1722 	mlx5e_build_rx_cq_param(priv, &cparam->rx_cq);
1723 	mlx5e_build_tx_cq_param(priv, &cparam->tx_cq);
1724 }
1725 
1726 static int
1727 mlx5e_open_channels(struct mlx5e_priv *priv)
1728 {
1729 	struct mlx5e_channel_param cparam;
1730 	void *ptr;
1731 	int err;
1732 	int i;
1733 	int j;
1734 
1735 	priv->channel = malloc(priv->params.num_channels *
1736 	    sizeof(struct mlx5e_channel *), M_MLX5EN, M_WAITOK | M_ZERO);
1737 	if (priv->channel == NULL)
1738 		return (-ENOMEM);
1739 
1740 	mlx5e_build_channel_param(priv, &cparam);
1741 	for (i = 0; i < priv->params.num_channels; i++) {
1742 		err = mlx5e_open_channel(priv, i, &cparam, &priv->channel[i]);
1743 		if (err)
1744 			goto err_close_channels;
1745 	}
1746 
1747 	for (j = 0; j < priv->params.num_channels; j++) {
1748 		err = mlx5e_wait_for_min_rx_wqes(&priv->channel[j]->rq);
1749 		if (err)
1750 			goto err_close_channels;
1751 	}
1752 
1753 	return (0);
1754 
1755 err_close_channels:
1756 	for (i--; i >= 0; i--) {
1757 		mlx5e_close_channel(&priv->channel[i]);
1758 		mlx5e_close_channel_wait(&priv->channel[i]);
1759 	}
1760 
1761 	/* remove "volatile" attribute from "channel" pointer */
1762 	ptr = __DECONST(void *, priv->channel);
1763 	priv->channel = NULL;
1764 
1765 	free(ptr, M_MLX5EN);
1766 
1767 	return (err);
1768 }
1769 
1770 static void
1771 mlx5e_close_channels(struct mlx5e_priv *priv)
1772 {
1773 	void *ptr;
1774 	int i;
1775 
1776 	if (priv->channel == NULL)
1777 		return;
1778 
1779 	for (i = 0; i < priv->params.num_channels; i++)
1780 		mlx5e_close_channel(&priv->channel[i]);
1781 	for (i = 0; i < priv->params.num_channels; i++)
1782 		mlx5e_close_channel_wait(&priv->channel[i]);
1783 
1784 	/* remove "volatile" attribute from "channel" pointer */
1785 	ptr = __DECONST(void *, priv->channel);
1786 	priv->channel = NULL;
1787 
1788 	free(ptr, M_MLX5EN);
1789 }
1790 
1791 static int
1792 mlx5e_refresh_sq_params(struct mlx5e_priv *priv, struct mlx5e_sq *sq)
1793 {
1794 	return (mlx5_core_modify_cq_moderation(priv->mdev, &sq->cq.mcq,
1795 	    priv->params.tx_cq_moderation_usec,
1796 	    priv->params.tx_cq_moderation_pkts));
1797 }
1798 
1799 static int
1800 mlx5e_refresh_rq_params(struct mlx5e_priv *priv, struct mlx5e_rq *rq)
1801 {
1802 	return (mlx5_core_modify_cq_moderation(priv->mdev, &rq->cq.mcq,
1803 	    priv->params.rx_cq_moderation_usec,
1804 	    priv->params.rx_cq_moderation_pkts));
1805 }
1806 
1807 static int
1808 mlx5e_refresh_channel_params_sub(struct mlx5e_priv *priv, struct mlx5e_channel *c)
1809 {
1810 	int err;
1811 	int i;
1812 
1813 	if (c == NULL)
1814 		return (EINVAL);
1815 
1816 	err = mlx5e_refresh_rq_params(priv, &c->rq);
1817 	if (err)
1818 		goto done;
1819 
1820 	for (i = 0; i != c->num_tc; i++) {
1821 		err = mlx5e_refresh_sq_params(priv, &c->sq[i]);
1822 		if (err)
1823 			goto done;
1824 	}
1825 done:
1826 	return (err);
1827 }
1828 
1829 int
1830 mlx5e_refresh_channel_params(struct mlx5e_priv *priv)
1831 {
1832 	int i;
1833 
1834 	if (priv->channel == NULL)
1835 		return (EINVAL);
1836 
1837 	for (i = 0; i < priv->params.num_channels; i++) {
1838 		int err;
1839 
1840 		err = mlx5e_refresh_channel_params_sub(priv, priv->channel[i]);
1841 		if (err)
1842 			return (err);
1843 	}
1844 	return (0);
1845 }
1846 
1847 static int
1848 mlx5e_open_tis(struct mlx5e_priv *priv, int tc)
1849 {
1850 	struct mlx5_core_dev *mdev = priv->mdev;
1851 	u32 in[MLX5_ST_SZ_DW(create_tis_in)];
1852 	void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
1853 
1854 	memset(in, 0, sizeof(in));
1855 
1856 	MLX5_SET(tisc, tisc, prio, tc);
1857 	MLX5_SET(tisc, tisc, transport_domain, priv->tdn);
1858 
1859 	return (mlx5_core_create_tis(mdev, in, sizeof(in), &priv->tisn[tc]));
1860 }
1861 
1862 static void
1863 mlx5e_close_tis(struct mlx5e_priv *priv, int tc)
1864 {
1865 	mlx5_core_destroy_tis(priv->mdev, priv->tisn[tc]);
1866 }
1867 
1868 static int
1869 mlx5e_open_tises(struct mlx5e_priv *priv)
1870 {
1871 	int num_tc = priv->num_tc;
1872 	int err;
1873 	int tc;
1874 
1875 	for (tc = 0; tc < num_tc; tc++) {
1876 		err = mlx5e_open_tis(priv, tc);
1877 		if (err)
1878 			goto err_close_tises;
1879 	}
1880 
1881 	return (0);
1882 
1883 err_close_tises:
1884 	for (tc--; tc >= 0; tc--)
1885 		mlx5e_close_tis(priv, tc);
1886 
1887 	return (err);
1888 }
1889 
1890 static void
1891 mlx5e_close_tises(struct mlx5e_priv *priv)
1892 {
1893 	int num_tc = priv->num_tc;
1894 	int tc;
1895 
1896 	for (tc = 0; tc < num_tc; tc++)
1897 		mlx5e_close_tis(priv, tc);
1898 }
1899 
1900 static int
1901 mlx5e_open_rqt(struct mlx5e_priv *priv)
1902 {
1903 	struct mlx5_core_dev *mdev = priv->mdev;
1904 	u32 *in;
1905 	u32 out[MLX5_ST_SZ_DW(create_rqt_out)];
1906 	void *rqtc;
1907 	int inlen;
1908 	int err;
1909 	int sz;
1910 	int i;
1911 
1912 	sz = 1 << priv->params.rx_hash_log_tbl_sz;
1913 
1914 	inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
1915 	in = mlx5_vzalloc(inlen);
1916 	if (in == NULL)
1917 		return (-ENOMEM);
1918 	rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
1919 
1920 	MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
1921 	MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
1922 
1923 	for (i = 0; i < sz; i++) {
1924 		int ix;
1925 #ifdef RSS
1926 		ix = rss_get_indirection_to_bucket(i);
1927 #else
1928 		ix = i;
1929 #endif
1930 		/* ensure we don't overflow */
1931 		ix %= priv->params.num_channels;
1932 		MLX5_SET(rqtc, rqtc, rq_num[i], priv->channel[ix]->rq.rqn);
1933 	}
1934 
1935 	MLX5_SET(create_rqt_in, in, opcode, MLX5_CMD_OP_CREATE_RQT);
1936 
1937 	memset(out, 0, sizeof(out));
1938 	err = mlx5_cmd_exec_check_status(mdev, in, inlen, out, sizeof(out));
1939 	if (!err)
1940 		priv->rqtn = MLX5_GET(create_rqt_out, out, rqtn);
1941 
1942 	kvfree(in);
1943 
1944 	return (err);
1945 }
1946 
1947 static void
1948 mlx5e_close_rqt(struct mlx5e_priv *priv)
1949 {
1950 	u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)];
1951 	u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)];
1952 
1953 	memset(in, 0, sizeof(in));
1954 
1955 	MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT);
1956 	MLX5_SET(destroy_rqt_in, in, rqtn, priv->rqtn);
1957 
1958 	mlx5_cmd_exec_check_status(priv->mdev, in, sizeof(in), out,
1959 	    sizeof(out));
1960 }
1961 
1962 static void
1963 mlx5e_build_tir_ctx(struct mlx5e_priv *priv, u32 * tirc, int tt)
1964 {
1965 	void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
1966 	__be32 *hkey;
1967 
1968 	MLX5_SET(tirc, tirc, transport_domain, priv->tdn);
1969 
1970 #define	ROUGH_MAX_L2_L3_HDR_SZ 256
1971 
1972 #define	MLX5_HASH_IP     (MLX5_HASH_FIELD_SEL_SRC_IP   |\
1973 			  MLX5_HASH_FIELD_SEL_DST_IP)
1974 
1975 #define	MLX5_HASH_ALL    (MLX5_HASH_FIELD_SEL_SRC_IP   |\
1976 			  MLX5_HASH_FIELD_SEL_DST_IP   |\
1977 			  MLX5_HASH_FIELD_SEL_L4_SPORT |\
1978 			  MLX5_HASH_FIELD_SEL_L4_DPORT)
1979 
1980 #define	MLX5_HASH_IP_IPSEC_SPI	(MLX5_HASH_FIELD_SEL_SRC_IP   |\
1981 				 MLX5_HASH_FIELD_SEL_DST_IP   |\
1982 				 MLX5_HASH_FIELD_SEL_IPSEC_SPI)
1983 
1984 	if (priv->params.hw_lro_en) {
1985 		MLX5_SET(tirc, tirc, lro_enable_mask,
1986 		    MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO |
1987 		    MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO);
1988 		MLX5_SET(tirc, tirc, lro_max_msg_sz,
1989 		    (priv->params.lro_wqe_sz -
1990 		    ROUGH_MAX_L2_L3_HDR_SZ) >> 8);
1991 		/* TODO: add the option to choose timer value dynamically */
1992 		MLX5_SET(tirc, tirc, lro_timeout_period_usecs,
1993 		    MLX5_CAP_ETH(priv->mdev,
1994 		    lro_timer_supported_periods[2]));
1995 	}
1996 
1997 	/* setup parameters for hashing TIR type, if any */
1998 	switch (tt) {
1999 	case MLX5E_TT_ANY:
2000 		MLX5_SET(tirc, tirc, disp_type,
2001 		    MLX5_TIRC_DISP_TYPE_DIRECT);
2002 		MLX5_SET(tirc, tirc, inline_rqn,
2003 		    priv->channel[0]->rq.rqn);
2004 		break;
2005 	default:
2006 		MLX5_SET(tirc, tirc, disp_type,
2007 		    MLX5_TIRC_DISP_TYPE_INDIRECT);
2008 		MLX5_SET(tirc, tirc, indirect_table,
2009 		    priv->rqtn);
2010 		MLX5_SET(tirc, tirc, rx_hash_fn,
2011 		    MLX5_TIRC_RX_HASH_FN_HASH_TOEPLITZ);
2012 		hkey = (__be32 *) MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
2013 #ifdef RSS
2014 		/*
2015 		 * The FreeBSD RSS implementation does currently not
2016 		 * support symmetric Toeplitz hashes:
2017 		 */
2018 		MLX5_SET(tirc, tirc, rx_hash_symmetric, 0);
2019 		rss_getkey((uint8_t *)hkey);
2020 #else
2021 		MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
2022 		hkey[0] = cpu_to_be32(0xD181C62C);
2023 		hkey[1] = cpu_to_be32(0xF7F4DB5B);
2024 		hkey[2] = cpu_to_be32(0x1983A2FC);
2025 		hkey[3] = cpu_to_be32(0x943E1ADB);
2026 		hkey[4] = cpu_to_be32(0xD9389E6B);
2027 		hkey[5] = cpu_to_be32(0xD1039C2C);
2028 		hkey[6] = cpu_to_be32(0xA74499AD);
2029 		hkey[7] = cpu_to_be32(0x593D56D9);
2030 		hkey[8] = cpu_to_be32(0xF3253C06);
2031 		hkey[9] = cpu_to_be32(0x2ADC1FFC);
2032 #endif
2033 		break;
2034 	}
2035 
2036 	switch (tt) {
2037 	case MLX5E_TT_IPV4_TCP:
2038 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2039 		    MLX5_L3_PROT_TYPE_IPV4);
2040 		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2041 		    MLX5_L4_PROT_TYPE_TCP);
2042 #ifdef RSS
2043 		if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV4)) {
2044 			MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2045 			    MLX5_HASH_IP);
2046 		} else
2047 #endif
2048 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2049 		    MLX5_HASH_ALL);
2050 		break;
2051 
2052 	case MLX5E_TT_IPV6_TCP:
2053 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2054 		    MLX5_L3_PROT_TYPE_IPV6);
2055 		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2056 		    MLX5_L4_PROT_TYPE_TCP);
2057 #ifdef RSS
2058 		if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV6)) {
2059 			MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2060 			    MLX5_HASH_IP);
2061 		} else
2062 #endif
2063 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2064 		    MLX5_HASH_ALL);
2065 		break;
2066 
2067 	case MLX5E_TT_IPV4_UDP:
2068 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2069 		    MLX5_L3_PROT_TYPE_IPV4);
2070 		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2071 		    MLX5_L4_PROT_TYPE_UDP);
2072 #ifdef RSS
2073 		if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV4)) {
2074 			MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2075 			    MLX5_HASH_IP);
2076 		} else
2077 #endif
2078 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2079 		    MLX5_HASH_ALL);
2080 		break;
2081 
2082 	case MLX5E_TT_IPV6_UDP:
2083 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2084 		    MLX5_L3_PROT_TYPE_IPV6);
2085 		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2086 		    MLX5_L4_PROT_TYPE_UDP);
2087 #ifdef RSS
2088 		if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV6)) {
2089 			MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2090 			    MLX5_HASH_IP);
2091 		} else
2092 #endif
2093 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2094 		    MLX5_HASH_ALL);
2095 		break;
2096 
2097 	case MLX5E_TT_IPV4_IPSEC_AH:
2098 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2099 		    MLX5_L3_PROT_TYPE_IPV4);
2100 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2101 		    MLX5_HASH_IP_IPSEC_SPI);
2102 		break;
2103 
2104 	case MLX5E_TT_IPV6_IPSEC_AH:
2105 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2106 		    MLX5_L3_PROT_TYPE_IPV6);
2107 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2108 		    MLX5_HASH_IP_IPSEC_SPI);
2109 		break;
2110 
2111 	case MLX5E_TT_IPV4_IPSEC_ESP:
2112 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2113 		    MLX5_L3_PROT_TYPE_IPV4);
2114 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2115 		    MLX5_HASH_IP_IPSEC_SPI);
2116 		break;
2117 
2118 	case MLX5E_TT_IPV6_IPSEC_ESP:
2119 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2120 		    MLX5_L3_PROT_TYPE_IPV6);
2121 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2122 		    MLX5_HASH_IP_IPSEC_SPI);
2123 		break;
2124 
2125 	case MLX5E_TT_IPV4:
2126 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2127 		    MLX5_L3_PROT_TYPE_IPV4);
2128 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2129 		    MLX5_HASH_IP);
2130 		break;
2131 
2132 	case MLX5E_TT_IPV6:
2133 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2134 		    MLX5_L3_PROT_TYPE_IPV6);
2135 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2136 		    MLX5_HASH_IP);
2137 		break;
2138 
2139 	default:
2140 		break;
2141 	}
2142 }
2143 
2144 static int
2145 mlx5e_open_tir(struct mlx5e_priv *priv, int tt)
2146 {
2147 	struct mlx5_core_dev *mdev = priv->mdev;
2148 	u32 *in;
2149 	void *tirc;
2150 	int inlen;
2151 	int err;
2152 
2153 	inlen = MLX5_ST_SZ_BYTES(create_tir_in);
2154 	in = mlx5_vzalloc(inlen);
2155 	if (in == NULL)
2156 		return (-ENOMEM);
2157 	tirc = MLX5_ADDR_OF(create_tir_in, in, tir_context);
2158 
2159 	mlx5e_build_tir_ctx(priv, tirc, tt);
2160 
2161 	err = mlx5_core_create_tir(mdev, in, inlen, &priv->tirn[tt]);
2162 
2163 	kvfree(in);
2164 
2165 	return (err);
2166 }
2167 
2168 static void
2169 mlx5e_close_tir(struct mlx5e_priv *priv, int tt)
2170 {
2171 	mlx5_core_destroy_tir(priv->mdev, priv->tirn[tt]);
2172 }
2173 
2174 static int
2175 mlx5e_open_tirs(struct mlx5e_priv *priv)
2176 {
2177 	int err;
2178 	int i;
2179 
2180 	for (i = 0; i < MLX5E_NUM_TT; i++) {
2181 		err = mlx5e_open_tir(priv, i);
2182 		if (err)
2183 			goto err_close_tirs;
2184 	}
2185 
2186 	return (0);
2187 
2188 err_close_tirs:
2189 	for (i--; i >= 0; i--)
2190 		mlx5e_close_tir(priv, i);
2191 
2192 	return (err);
2193 }
2194 
2195 static void
2196 mlx5e_close_tirs(struct mlx5e_priv *priv)
2197 {
2198 	int i;
2199 
2200 	for (i = 0; i < MLX5E_NUM_TT; i++)
2201 		mlx5e_close_tir(priv, i);
2202 }
2203 
2204 /*
2205  * SW MTU does not include headers,
2206  * HW MTU includes all headers and checksums.
2207  */
2208 static int
2209 mlx5e_set_dev_port_mtu(struct ifnet *ifp, int sw_mtu)
2210 {
2211 	struct mlx5e_priv *priv = ifp->if_softc;
2212 	struct mlx5_core_dev *mdev = priv->mdev;
2213 	int hw_mtu;
2214 	int err;
2215 
2216 
2217 	err = mlx5_set_port_mtu(mdev, MLX5E_SW2HW_MTU(sw_mtu));
2218 	if (err) {
2219 		if_printf(ifp, "%s: mlx5_set_port_mtu failed setting %d, err=%d\n",
2220 		    __func__, sw_mtu, err);
2221 		return (err);
2222 	}
2223 	err = mlx5_query_port_oper_mtu(mdev, &hw_mtu);
2224 	if (!err) {
2225 		ifp->if_mtu = MLX5E_HW2SW_MTU(hw_mtu);
2226 
2227 		if (ifp->if_mtu != sw_mtu) {
2228 			if_printf(ifp, "Port MTU %d is different than "
2229 			    "ifp mtu %d\n", sw_mtu, (int)ifp->if_mtu);
2230 		}
2231 	} else {
2232 		if_printf(ifp, "Query port MTU, after setting new "
2233 		    "MTU value, failed\n");
2234 		ifp->if_mtu = sw_mtu;
2235 	}
2236 	return (0);
2237 }
2238 
2239 int
2240 mlx5e_open_locked(struct ifnet *ifp)
2241 {
2242 	struct mlx5e_priv *priv = ifp->if_softc;
2243 	int err;
2244 
2245 	/* check if already opened */
2246 	if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0)
2247 		return (0);
2248 
2249 #ifdef RSS
2250 	if (rss_getnumbuckets() > priv->params.num_channels) {
2251 		if_printf(ifp, "NOTE: There are more RSS buckets(%u) than "
2252 		    "channels(%u) available\n", rss_getnumbuckets(),
2253 		    priv->params.num_channels);
2254 	}
2255 #endif
2256 	err = mlx5e_open_tises(priv);
2257 	if (err) {
2258 		if_printf(ifp, "%s: mlx5e_open_tises failed, %d\n",
2259 		    __func__, err);
2260 		return (err);
2261 	}
2262 	err = mlx5_vport_alloc_q_counter(priv->mdev, &priv->counter_set_id);
2263 	if (err) {
2264 		if_printf(priv->ifp,
2265 		    "%s: mlx5_vport_alloc_q_counter failed: %d\n",
2266 		    __func__, err);
2267 		goto err_close_tises;
2268 	}
2269 	err = mlx5e_open_channels(priv);
2270 	if (err) {
2271 		if_printf(ifp, "%s: mlx5e_open_channels failed, %d\n",
2272 		    __func__, err);
2273 		goto err_dalloc_q_counter;
2274 	}
2275 	err = mlx5e_open_rqt(priv);
2276 	if (err) {
2277 		if_printf(ifp, "%s: mlx5e_open_rqt failed, %d\n",
2278 		    __func__, err);
2279 		goto err_close_channels;
2280 	}
2281 	err = mlx5e_open_tirs(priv);
2282 	if (err) {
2283 		if_printf(ifp, "%s: mlx5e_open_tir failed, %d\n",
2284 		    __func__, err);
2285 		goto err_close_rqls;
2286 	}
2287 	err = mlx5e_open_flow_table(priv);
2288 	if (err) {
2289 		if_printf(ifp, "%s: mlx5e_open_flow_table failed, %d\n",
2290 		    __func__, err);
2291 		goto err_close_tirs;
2292 	}
2293 	err = mlx5e_add_all_vlan_rules(priv);
2294 	if (err) {
2295 		if_printf(ifp, "%s: mlx5e_add_all_vlan_rules failed, %d\n",
2296 		    __func__, err);
2297 		goto err_close_flow_table;
2298 	}
2299 	set_bit(MLX5E_STATE_OPENED, &priv->state);
2300 
2301 	mlx5e_update_carrier(priv);
2302 	mlx5e_set_rx_mode_core(priv);
2303 
2304 	return (0);
2305 
2306 err_close_flow_table:
2307 	mlx5e_close_flow_table(priv);
2308 
2309 err_close_tirs:
2310 	mlx5e_close_tirs(priv);
2311 
2312 err_close_rqls:
2313 	mlx5e_close_rqt(priv);
2314 
2315 err_close_channels:
2316 	mlx5e_close_channels(priv);
2317 
2318 err_dalloc_q_counter:
2319 	mlx5_vport_dealloc_q_counter(priv->mdev, priv->counter_set_id);
2320 
2321 err_close_tises:
2322 	mlx5e_close_tises(priv);
2323 
2324 	return (err);
2325 }
2326 
2327 static void
2328 mlx5e_open(void *arg)
2329 {
2330 	struct mlx5e_priv *priv = arg;
2331 
2332 	PRIV_LOCK(priv);
2333 	if (mlx5_set_port_status(priv->mdev, MLX5_PORT_UP))
2334 		if_printf(priv->ifp,
2335 		    "%s: Setting port status to up failed\n",
2336 		    __func__);
2337 
2338 	mlx5e_open_locked(priv->ifp);
2339 	priv->ifp->if_drv_flags |= IFF_DRV_RUNNING;
2340 	PRIV_UNLOCK(priv);
2341 }
2342 
2343 int
2344 mlx5e_close_locked(struct ifnet *ifp)
2345 {
2346 	struct mlx5e_priv *priv = ifp->if_softc;
2347 
2348 	/* check if already closed */
2349 	if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
2350 		return (0);
2351 
2352 	clear_bit(MLX5E_STATE_OPENED, &priv->state);
2353 
2354 	mlx5e_set_rx_mode_core(priv);
2355 	mlx5e_del_all_vlan_rules(priv);
2356 	if_link_state_change(priv->ifp, LINK_STATE_DOWN);
2357 	mlx5e_close_flow_table(priv);
2358 	mlx5e_close_tirs(priv);
2359 	mlx5e_close_rqt(priv);
2360 	mlx5e_close_channels(priv);
2361 	mlx5_vport_dealloc_q_counter(priv->mdev, priv->counter_set_id);
2362 	mlx5e_close_tises(priv);
2363 
2364 	return (0);
2365 }
2366 
2367 #if (__FreeBSD_version >= 1100000)
2368 static uint64_t
2369 mlx5e_get_counter(struct ifnet *ifp, ift_counter cnt)
2370 {
2371 	struct mlx5e_priv *priv = ifp->if_softc;
2372 	u64 retval;
2373 
2374 	/* PRIV_LOCK(priv); XXX not allowed */
2375 	switch (cnt) {
2376 	case IFCOUNTER_IPACKETS:
2377 		retval = priv->stats.vport.rx_packets;
2378 		break;
2379 	case IFCOUNTER_IERRORS:
2380 		retval = priv->stats.vport.rx_error_packets;
2381 		break;
2382 	case IFCOUNTER_IQDROPS:
2383 		retval = priv->stats.vport.rx_out_of_buffer;
2384 		break;
2385 	case IFCOUNTER_OPACKETS:
2386 		retval = priv->stats.vport.tx_packets;
2387 		break;
2388 	case IFCOUNTER_OERRORS:
2389 		retval = priv->stats.vport.tx_error_packets;
2390 		break;
2391 	case IFCOUNTER_IBYTES:
2392 		retval = priv->stats.vport.rx_bytes;
2393 		break;
2394 	case IFCOUNTER_OBYTES:
2395 		retval = priv->stats.vport.tx_bytes;
2396 		break;
2397 	case IFCOUNTER_IMCASTS:
2398 		retval = priv->stats.vport.rx_multicast_packets;
2399 		break;
2400 	case IFCOUNTER_OMCASTS:
2401 		retval = priv->stats.vport.tx_multicast_packets;
2402 		break;
2403 	case IFCOUNTER_OQDROPS:
2404 		retval = priv->stats.vport.tx_queue_dropped;
2405 		break;
2406 	default:
2407 		retval = if_get_counter_default(ifp, cnt);
2408 		break;
2409 	}
2410 	/* PRIV_UNLOCK(priv); XXX not allowed */
2411 	return (retval);
2412 }
2413 #endif
2414 
2415 static void
2416 mlx5e_set_rx_mode(struct ifnet *ifp)
2417 {
2418 	struct mlx5e_priv *priv = ifp->if_softc;
2419 
2420 	schedule_work(&priv->set_rx_mode_work);
2421 }
2422 
2423 static int
2424 mlx5e_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
2425 {
2426 	struct mlx5e_priv *priv;
2427 	struct ifreq *ifr;
2428 	struct ifi2creq i2c;
2429 	int error = 0;
2430 	int mask = 0;
2431 	int size_read = 0;
2432 	int module_num;
2433 	int max_mtu;
2434 	uint8_t read_addr;
2435 
2436 	priv = ifp->if_softc;
2437 
2438 	/* check if detaching */
2439 	if (priv == NULL || priv->gone != 0)
2440 		return (ENXIO);
2441 
2442 	switch (command) {
2443 	case SIOCSIFMTU:
2444 		ifr = (struct ifreq *)data;
2445 
2446 		PRIV_LOCK(priv);
2447 		mlx5_query_port_max_mtu(priv->mdev, &max_mtu);
2448 
2449 		if (ifr->ifr_mtu >= MLX5E_MTU_MIN &&
2450 		    ifr->ifr_mtu <= MIN(MLX5E_MTU_MAX, max_mtu)) {
2451 			int was_opened;
2452 
2453 			was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
2454 			if (was_opened)
2455 				mlx5e_close_locked(ifp);
2456 
2457 			/* set new MTU */
2458 			mlx5e_set_dev_port_mtu(ifp, ifr->ifr_mtu);
2459 
2460 			if (was_opened)
2461 				mlx5e_open_locked(ifp);
2462 		} else {
2463 			error = EINVAL;
2464 			if_printf(ifp, "Invalid MTU value. Min val: %d, Max val: %d\n",
2465 			    MLX5E_MTU_MIN, MIN(MLX5E_MTU_MAX, max_mtu));
2466 		}
2467 		PRIV_UNLOCK(priv);
2468 		break;
2469 	case SIOCSIFFLAGS:
2470 		if ((ifp->if_flags & IFF_UP) &&
2471 		    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
2472 			mlx5e_set_rx_mode(ifp);
2473 			break;
2474 		}
2475 		PRIV_LOCK(priv);
2476 		if (ifp->if_flags & IFF_UP) {
2477 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2478 				if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
2479 					mlx5e_open_locked(ifp);
2480 				ifp->if_drv_flags |= IFF_DRV_RUNNING;
2481 				mlx5_set_port_status(priv->mdev, MLX5_PORT_UP);
2482 			}
2483 		} else {
2484 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2485 				mlx5_set_port_status(priv->mdev,
2486 				    MLX5_PORT_DOWN);
2487 				if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0)
2488 					mlx5e_close_locked(ifp);
2489 				mlx5e_update_carrier(priv);
2490 				ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2491 			}
2492 		}
2493 		PRIV_UNLOCK(priv);
2494 		break;
2495 	case SIOCADDMULTI:
2496 	case SIOCDELMULTI:
2497 		mlx5e_set_rx_mode(ifp);
2498 		break;
2499 	case SIOCSIFMEDIA:
2500 	case SIOCGIFMEDIA:
2501 	case SIOCGIFXMEDIA:
2502 		ifr = (struct ifreq *)data;
2503 		error = ifmedia_ioctl(ifp, ifr, &priv->media, command);
2504 		break;
2505 	case SIOCSIFCAP:
2506 		ifr = (struct ifreq *)data;
2507 		PRIV_LOCK(priv);
2508 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2509 
2510 		if (mask & IFCAP_TXCSUM) {
2511 			ifp->if_capenable ^= IFCAP_TXCSUM;
2512 			ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
2513 
2514 			if (IFCAP_TSO4 & ifp->if_capenable &&
2515 			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
2516 				ifp->if_capenable &= ~IFCAP_TSO4;
2517 				ifp->if_hwassist &= ~CSUM_IP_TSO;
2518 				if_printf(ifp,
2519 				    "tso4 disabled due to -txcsum.\n");
2520 			}
2521 		}
2522 		if (mask & IFCAP_TXCSUM_IPV6) {
2523 			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
2524 			ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
2525 
2526 			if (IFCAP_TSO6 & ifp->if_capenable &&
2527 			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
2528 				ifp->if_capenable &= ~IFCAP_TSO6;
2529 				ifp->if_hwassist &= ~CSUM_IP6_TSO;
2530 				if_printf(ifp,
2531 				    "tso6 disabled due to -txcsum6.\n");
2532 			}
2533 		}
2534 		if (mask & IFCAP_RXCSUM)
2535 			ifp->if_capenable ^= IFCAP_RXCSUM;
2536 		if (mask & IFCAP_RXCSUM_IPV6)
2537 			ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
2538 		if (mask & IFCAP_TSO4) {
2539 			if (!(IFCAP_TSO4 & ifp->if_capenable) &&
2540 			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
2541 				if_printf(ifp, "enable txcsum first.\n");
2542 				error = EAGAIN;
2543 				goto out;
2544 			}
2545 			ifp->if_capenable ^= IFCAP_TSO4;
2546 			ifp->if_hwassist ^= CSUM_IP_TSO;
2547 		}
2548 		if (mask & IFCAP_TSO6) {
2549 			if (!(IFCAP_TSO6 & ifp->if_capenable) &&
2550 			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
2551 				if_printf(ifp, "enable txcsum6 first.\n");
2552 				error = EAGAIN;
2553 				goto out;
2554 			}
2555 			ifp->if_capenable ^= IFCAP_TSO6;
2556 			ifp->if_hwassist ^= CSUM_IP6_TSO;
2557 		}
2558 		if (mask & IFCAP_VLAN_HWFILTER) {
2559 			if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
2560 				mlx5e_disable_vlan_filter(priv);
2561 			else
2562 				mlx5e_enable_vlan_filter(priv);
2563 
2564 			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
2565 		}
2566 		if (mask & IFCAP_VLAN_HWTAGGING)
2567 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2568 		if (mask & IFCAP_WOL_MAGIC)
2569 			ifp->if_capenable ^= IFCAP_WOL_MAGIC;
2570 
2571 		VLAN_CAPABILITIES(ifp);
2572 		/* turn off LRO means also turn of HW LRO - if it's on */
2573 		if (mask & IFCAP_LRO) {
2574 			int was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
2575 			bool need_restart = false;
2576 
2577 			ifp->if_capenable ^= IFCAP_LRO;
2578 			if (!(ifp->if_capenable & IFCAP_LRO)) {
2579 				if (priv->params.hw_lro_en) {
2580 					priv->params.hw_lro_en = false;
2581 					need_restart = true;
2582 					/* Not sure this is the correct way */
2583 					priv->params_ethtool.hw_lro = priv->params.hw_lro_en;
2584 				}
2585 			}
2586 			if (was_opened && need_restart) {
2587 				mlx5e_close_locked(ifp);
2588 				mlx5e_open_locked(ifp);
2589 			}
2590 		}
2591 out:
2592 		PRIV_UNLOCK(priv);
2593 		break;
2594 
2595 	case SIOCGI2C:
2596 		ifr = (struct ifreq *)data;
2597 
2598 		/*
2599 		 * Copy from the user-space address ifr_data to the
2600 		 * kernel-space address i2c
2601 		 */
2602 		error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
2603 		if (error)
2604 			break;
2605 
2606 		if (i2c.len > sizeof(i2c.data)) {
2607 			error = EINVAL;
2608 			break;
2609 		}
2610 
2611 		PRIV_LOCK(priv);
2612 		/* Get module_num which is required for the query_eeprom */
2613 		error = mlx5_query_module_num(priv->mdev, &module_num);
2614 		if (error) {
2615 			if_printf(ifp, "Query module num failed, eeprom "
2616 			    "reading is not supported\n");
2617 			error = EINVAL;
2618 			goto err_i2c;
2619 		}
2620 		/* Check if module is present before doing an access */
2621 		if (mlx5_query_module_status(priv->mdev, module_num) !=
2622 		    MLX5_MODULE_STATUS_PLUGGED) {
2623 			error = EINVAL;
2624 			goto err_i2c;
2625 		}
2626 		/*
2627 		 * Currently 0XA0 and 0xA2 are the only addresses permitted.
2628 		 * The internal conversion is as follows:
2629 		 */
2630 		if (i2c.dev_addr == 0xA0)
2631 			read_addr = MLX5E_I2C_ADDR_LOW;
2632 		else if (i2c.dev_addr == 0xA2)
2633 			read_addr = MLX5E_I2C_ADDR_HIGH;
2634 		else {
2635 			if_printf(ifp, "Query eeprom failed, "
2636 			    "Invalid Address: %X\n", i2c.dev_addr);
2637 			error = EINVAL;
2638 			goto err_i2c;
2639 		}
2640 		error = mlx5_query_eeprom(priv->mdev,
2641 		    read_addr, MLX5E_EEPROM_LOW_PAGE,
2642 		    (uint32_t)i2c.offset, (uint32_t)i2c.len, module_num,
2643 		    (uint32_t *)i2c.data, &size_read);
2644 		if (error) {
2645 			if_printf(ifp, "Query eeprom failed, eeprom "
2646 			    "reading is not supported\n");
2647 			error = EINVAL;
2648 			goto err_i2c;
2649 		}
2650 
2651 		if (i2c.len > MLX5_EEPROM_MAX_BYTES) {
2652 			error = mlx5_query_eeprom(priv->mdev,
2653 			    read_addr, MLX5E_EEPROM_LOW_PAGE,
2654 			    (uint32_t)(i2c.offset + size_read),
2655 			    (uint32_t)(i2c.len - size_read), module_num,
2656 			    (uint32_t *)(i2c.data + size_read), &size_read);
2657 		}
2658 		if (error) {
2659 			if_printf(ifp, "Query eeprom failed, eeprom "
2660 			    "reading is not supported\n");
2661 			error = EINVAL;
2662 			goto err_i2c;
2663 		}
2664 
2665 		error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
2666 err_i2c:
2667 		PRIV_UNLOCK(priv);
2668 		break;
2669 
2670 	default:
2671 		error = ether_ioctl(ifp, command, data);
2672 		break;
2673 	}
2674 	return (error);
2675 }
2676 
2677 static int
2678 mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
2679 {
2680 	/*
2681 	 * TODO: uncoment once FW really sets all these bits if
2682 	 * (!mdev->caps.eth.rss_ind_tbl_cap || !mdev->caps.eth.csum_cap ||
2683 	 * !mdev->caps.eth.max_lso_cap || !mdev->caps.eth.vlan_cap ||
2684 	 * !(mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_SCQE_BRK_MOD)) return
2685 	 * -ENOTSUPP;
2686 	 */
2687 
2688 	/* TODO: add more must-to-have features */
2689 
2690 	return (0);
2691 }
2692 
2693 static void
2694 mlx5e_build_ifp_priv(struct mlx5_core_dev *mdev,
2695     struct mlx5e_priv *priv,
2696     int num_comp_vectors)
2697 {
2698 	/*
2699 	 * TODO: Consider link speed for setting "log_sq_size",
2700 	 * "log_rq_size" and "cq_moderation_xxx":
2701 	 */
2702 	priv->params.log_sq_size =
2703 	    MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
2704 	priv->params.log_rq_size =
2705 	    MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
2706 	priv->params.rx_cq_moderation_usec =
2707 	    MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
2708 	    MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE :
2709 	    MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC;
2710 	priv->params.rx_cq_moderation_mode =
2711 	    MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? 1 : 0;
2712 	priv->params.rx_cq_moderation_pkts =
2713 	    MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS;
2714 	priv->params.tx_cq_moderation_usec =
2715 	    MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC;
2716 	priv->params.tx_cq_moderation_pkts =
2717 	    MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
2718 	priv->params.min_rx_wqes =
2719 	    MLX5E_PARAMS_DEFAULT_MIN_RX_WQES;
2720 	priv->params.rx_hash_log_tbl_sz =
2721 	    (order_base_2(num_comp_vectors) >
2722 	    MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ) ?
2723 	    order_base_2(num_comp_vectors) :
2724 	    MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ;
2725 	priv->params.num_tc = 1;
2726 	priv->params.default_vlan_prio = 0;
2727 	priv->counter_set_id = -1;
2728 
2729 	/*
2730 	 * hw lro is currently defaulted to off. when it won't anymore we
2731 	 * will consider the HW capability: "!!MLX5_CAP_ETH(mdev, lro_cap)"
2732 	 */
2733 	priv->params.hw_lro_en = false;
2734 	priv->params.lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
2735 
2736 	priv->params.cqe_zipping_en = !!MLX5_CAP_GEN(mdev, cqe_compression);
2737 
2738 	priv->mdev = mdev;
2739 	priv->params.num_channels = num_comp_vectors;
2740 	priv->order_base_2_num_channels = order_base_2(num_comp_vectors);
2741 	priv->queue_mapping_channel_mask =
2742 	    roundup_pow_of_two(num_comp_vectors) - 1;
2743 	priv->num_tc = priv->params.num_tc;
2744 	priv->default_vlan_prio = priv->params.default_vlan_prio;
2745 
2746 	INIT_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
2747 	INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work);
2748 	INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work);
2749 }
2750 
2751 static int
2752 mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn,
2753     struct mlx5_core_mr *mr)
2754 {
2755 	struct ifnet *ifp = priv->ifp;
2756 	struct mlx5_core_dev *mdev = priv->mdev;
2757 	struct mlx5_create_mkey_mbox_in *in;
2758 	int err;
2759 
2760 	in = mlx5_vzalloc(sizeof(*in));
2761 	if (in == NULL) {
2762 		if_printf(ifp, "%s: failed to allocate inbox\n", __func__);
2763 		return (-ENOMEM);
2764 	}
2765 	in->seg.flags = MLX5_PERM_LOCAL_WRITE |
2766 	    MLX5_PERM_LOCAL_READ |
2767 	    MLX5_ACCESS_MODE_PA;
2768 	in->seg.flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64);
2769 	in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
2770 
2771 	err = mlx5_core_create_mkey(mdev, mr, in, sizeof(*in), NULL, NULL,
2772 	    NULL);
2773 	if (err)
2774 		if_printf(ifp, "%s: mlx5_core_create_mkey failed, %d\n",
2775 		    __func__, err);
2776 
2777 	kvfree(in);
2778 
2779 	return (err);
2780 }
2781 
2782 static const char *mlx5e_vport_stats_desc[] = {
2783 	MLX5E_VPORT_STATS(MLX5E_STATS_DESC)
2784 };
2785 
2786 static const char *mlx5e_pport_stats_desc[] = {
2787 	MLX5E_PPORT_STATS(MLX5E_STATS_DESC)
2788 };
2789 
2790 static void
2791 mlx5e_priv_mtx_init(struct mlx5e_priv *priv)
2792 {
2793 	mtx_init(&priv->async_events_mtx, "mlx5async", MTX_NETWORK_LOCK, MTX_DEF);
2794 	sx_init(&priv->state_lock, "mlx5state");
2795 	callout_init_mtx(&priv->watchdog, &priv->async_events_mtx, 0);
2796 }
2797 
2798 static void
2799 mlx5e_priv_mtx_destroy(struct mlx5e_priv *priv)
2800 {
2801 	mtx_destroy(&priv->async_events_mtx);
2802 	sx_destroy(&priv->state_lock);
2803 }
2804 
2805 static int
2806 sysctl_firmware(SYSCTL_HANDLER_ARGS)
2807 {
2808 	/*
2809 	 * %d.%d%.d the string format.
2810 	 * fw_rev_{maj,min,sub} return u16, 2^16 = 65536.
2811 	 * We need at most 5 chars to store that.
2812 	 * It also has: two "." and NULL at the end, which means we need 18
2813 	 * (5*3 + 3) chars at most.
2814 	 */
2815 	char fw[18];
2816 	struct mlx5e_priv *priv = arg1;
2817 	int error;
2818 
2819 	snprintf(fw, sizeof(fw), "%d.%d.%d", fw_rev_maj(priv->mdev), fw_rev_min(priv->mdev),
2820 	    fw_rev_sub(priv->mdev));
2821 	error = sysctl_handle_string(oidp, fw, sizeof(fw), req);
2822 	return (error);
2823 }
2824 
2825 static void
2826 mlx5e_add_hw_stats(struct mlx5e_priv *priv)
2827 {
2828 	SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw),
2829 	    OID_AUTO, "fw_version", CTLTYPE_STRING | CTLFLAG_RD, priv, 0,
2830 	    sysctl_firmware, "A", "HCA firmware version");
2831 
2832 	SYSCTL_ADD_STRING(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw),
2833 	    OID_AUTO, "board_id", CTLFLAG_RD, priv->mdev->board_id, 0,
2834 	    "Board ID");
2835 }
2836 
2837 static void
2838 mlx5e_setup_pauseframes(struct mlx5e_priv *priv)
2839 {
2840 #if (__FreeBSD_version < 1100000)
2841 	char path[64];
2842 
2843 #endif
2844 	/* Only receiving pauseframes is enabled by default */
2845 	priv->params.tx_pauseframe_control = 0;
2846 	priv->params.rx_pauseframe_control = 1;
2847 
2848 #if (__FreeBSD_version < 1100000)
2849 	/* compute path for sysctl */
2850 	snprintf(path, sizeof(path), "dev.mce.%d.tx_pauseframe_control",
2851 	    device_get_unit(priv->mdev->pdev->dev.bsddev));
2852 
2853 	/* try to fetch tunable, if any */
2854 	TUNABLE_INT_FETCH(path, &priv->params.tx_pauseframe_control);
2855 
2856 	/* compute path for sysctl */
2857 	snprintf(path, sizeof(path), "dev.mce.%d.rx_pauseframe_control",
2858 	    device_get_unit(priv->mdev->pdev->dev.bsddev));
2859 
2860 	/* try to fetch tunable, if any */
2861 	TUNABLE_INT_FETCH(path, &priv->params.rx_pauseframe_control);
2862 #endif
2863 
2864 	/* register pausframe SYSCTLs */
2865 	SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
2866 	    OID_AUTO, "tx_pauseframe_control", CTLFLAG_RDTUN,
2867 	    &priv->params.tx_pauseframe_control, 0,
2868 	    "Set to enable TX pause frames. Clear to disable.");
2869 
2870 	SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
2871 	    OID_AUTO, "rx_pauseframe_control", CTLFLAG_RDTUN,
2872 	    &priv->params.rx_pauseframe_control, 0,
2873 	    "Set to enable RX pause frames. Clear to disable.");
2874 
2875 	/* range check */
2876 	priv->params.tx_pauseframe_control =
2877 	    priv->params.tx_pauseframe_control ? 1 : 0;
2878 	priv->params.rx_pauseframe_control =
2879 	    priv->params.rx_pauseframe_control ? 1 : 0;
2880 
2881 	/* update firmware */
2882 	mlx5_set_port_pause(priv->mdev, 1,
2883 	    priv->params.rx_pauseframe_control,
2884 	    priv->params.tx_pauseframe_control);
2885 }
2886 
2887 static void *
2888 mlx5e_create_ifp(struct mlx5_core_dev *mdev)
2889 {
2890 	static volatile int mlx5_en_unit;
2891 	struct ifnet *ifp;
2892 	struct mlx5e_priv *priv;
2893 	u8 dev_addr[ETHER_ADDR_LEN] __aligned(4);
2894 	struct sysctl_oid_list *child;
2895 	int ncv = mdev->priv.eq_table.num_comp_vectors;
2896 	char unit[16];
2897 	int err;
2898 	int i;
2899 	u32 eth_proto_cap;
2900 
2901 	if (mlx5e_check_required_hca_cap(mdev)) {
2902 		mlx5_core_dbg(mdev, "mlx5e_check_required_hca_cap() failed\n");
2903 		return (NULL);
2904 	}
2905 	priv = malloc(sizeof(*priv), M_MLX5EN, M_WAITOK | M_ZERO);
2906 	if (priv == NULL) {
2907 		mlx5_core_err(mdev, "malloc() failed\n");
2908 		return (NULL);
2909 	}
2910 	mlx5e_priv_mtx_init(priv);
2911 
2912 	ifp = priv->ifp = if_alloc(IFT_ETHER);
2913 	if (ifp == NULL) {
2914 		mlx5_core_err(mdev, "if_alloc() failed\n");
2915 		goto err_free_priv;
2916 	}
2917 	ifp->if_softc = priv;
2918 	if_initname(ifp, "mce", atomic_fetchadd_int(&mlx5_en_unit, 1));
2919 	ifp->if_mtu = ETHERMTU;
2920 	ifp->if_init = mlx5e_open;
2921 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2922 	ifp->if_ioctl = mlx5e_ioctl;
2923 	ifp->if_transmit = mlx5e_xmit;
2924 	ifp->if_qflush = if_qflush;
2925 #if (__FreeBSD_version >= 1100000)
2926 	ifp->if_get_counter = mlx5e_get_counter;
2927 #endif
2928 	ifp->if_snd.ifq_maxlen = ifqmaxlen;
2929 	/*
2930          * Set driver features
2931          */
2932 	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6;
2933 	ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING;
2934 	ifp->if_capabilities |= IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWFILTER;
2935 	ifp->if_capabilities |= IFCAP_LINKSTATE | IFCAP_JUMBO_MTU;
2936 	ifp->if_capabilities |= IFCAP_LRO;
2937 	ifp->if_capabilities |= IFCAP_TSO | IFCAP_VLAN_HWTSO;
2938 
2939 	/* set TSO limits so that we don't have to drop TX packets */
2940 	ifp->if_hw_tsomax = MLX5E_MAX_TX_PAYLOAD_SIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
2941 	ifp->if_hw_tsomaxsegcount = MLX5E_MAX_TX_MBUF_FRAGS - 1 /* hdr */;
2942 	ifp->if_hw_tsomaxsegsize = MLX5E_MAX_TX_MBUF_SIZE;
2943 
2944 	ifp->if_capenable = ifp->if_capabilities;
2945 	ifp->if_hwassist = 0;
2946 	if (ifp->if_capenable & IFCAP_TSO)
2947 		ifp->if_hwassist |= CSUM_TSO;
2948 	if (ifp->if_capenable & IFCAP_TXCSUM)
2949 		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP);
2950 	if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
2951 		ifp->if_hwassist |= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
2952 
2953 	/* ifnet sysctl tree */
2954 	sysctl_ctx_init(&priv->sysctl_ctx);
2955 	priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_dev),
2956 	    OID_AUTO, ifp->if_dname, CTLFLAG_RD, 0, "MLX5 ethernet - interface name");
2957 	if (priv->sysctl_ifnet == NULL) {
2958 		mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
2959 		goto err_free_sysctl;
2960 	}
2961 	snprintf(unit, sizeof(unit), "%d", ifp->if_dunit);
2962 	priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
2963 	    OID_AUTO, unit, CTLFLAG_RD, 0, "MLX5 ethernet - interface unit");
2964 	if (priv->sysctl_ifnet == NULL) {
2965 		mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
2966 		goto err_free_sysctl;
2967 	}
2968 
2969 	/* HW sysctl tree */
2970 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(mdev->pdev->dev.bsddev));
2971 	priv->sysctl_hw = SYSCTL_ADD_NODE(&priv->sysctl_ctx, child,
2972 	    OID_AUTO, "hw", CTLFLAG_RD, 0, "MLX5 ethernet dev hw");
2973 	if (priv->sysctl_hw == NULL) {
2974 		mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
2975 		goto err_free_sysctl;
2976 	}
2977 	mlx5e_build_ifp_priv(mdev, priv, ncv);
2978 	err = mlx5_alloc_map_uar(mdev, &priv->cq_uar);
2979 	if (err) {
2980 		if_printf(ifp, "%s: mlx5_alloc_map_uar failed, %d\n",
2981 		    __func__, err);
2982 		goto err_free_sysctl;
2983 	}
2984 	err = mlx5_core_alloc_pd(mdev, &priv->pdn);
2985 	if (err) {
2986 		if_printf(ifp, "%s: mlx5_core_alloc_pd failed, %d\n",
2987 		    __func__, err);
2988 		goto err_unmap_free_uar;
2989 	}
2990 	err = mlx5_alloc_transport_domain(mdev, &priv->tdn);
2991 	if (err) {
2992 		if_printf(ifp, "%s: mlx5_alloc_transport_domain failed, %d\n",
2993 		    __func__, err);
2994 		goto err_dealloc_pd;
2995 	}
2996 	err = mlx5e_create_mkey(priv, priv->pdn, &priv->mr);
2997 	if (err) {
2998 		if_printf(ifp, "%s: mlx5e_create_mkey failed, %d\n",
2999 		    __func__, err);
3000 		goto err_dealloc_transport_domain;
3001 	}
3002 	mlx5_query_nic_vport_mac_address(priv->mdev, 0, dev_addr);
3003 
3004 	/* check if we should generate a random MAC address */
3005 	if (MLX5_CAP_GEN(priv->mdev, vport_group_manager) == 0 &&
3006 	    is_zero_ether_addr(dev_addr)) {
3007 		random_ether_addr(dev_addr);
3008 		if_printf(ifp, "Assigned random MAC address\n");
3009 	}
3010 
3011 	/* set default MTU */
3012 	mlx5e_set_dev_port_mtu(ifp, ifp->if_mtu);
3013 
3014 	/* Set desc */
3015 	device_set_desc(mdev->pdev->dev.bsddev, mlx5e_version);
3016 
3017 	/* Set default media status */
3018 	priv->media_status_last = IFM_AVALID;
3019 	priv->media_active_last = IFM_ETHER | IFM_AUTO |
3020 	    IFM_ETH_RXPAUSE | IFM_FDX;
3021 
3022 	/* setup default pauseframes configuration */
3023 	mlx5e_setup_pauseframes(priv);
3024 
3025 	err = mlx5_query_port_proto_cap(mdev, &eth_proto_cap, MLX5_PTYS_EN);
3026 	if (err) {
3027 		eth_proto_cap = 0;
3028 		if_printf(ifp, "%s: Query port media capability failed, %d\n",
3029 		    __func__, err);
3030 	}
3031 
3032 	/* Setup supported medias */
3033 	ifmedia_init(&priv->media, IFM_IMASK | IFM_ETH_FMASK,
3034 	    mlx5e_media_change, mlx5e_media_status);
3035 
3036 	for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
3037 		if (mlx5e_mode_table[i].baudrate == 0)
3038 			continue;
3039 		if (MLX5E_PROT_MASK(i) & eth_proto_cap) {
3040 			ifmedia_add(&priv->media,
3041 			    mlx5e_mode_table[i].subtype |
3042 			    IFM_ETHER, 0, NULL);
3043 			ifmedia_add(&priv->media,
3044 			    mlx5e_mode_table[i].subtype |
3045 			    IFM_ETHER | IFM_FDX |
3046 			    IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL);
3047 		}
3048 	}
3049 
3050 	ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3051 	ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO | IFM_FDX |
3052 	    IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL);
3053 
3054 	/* Set autoselect by default */
3055 	ifmedia_set(&priv->media, IFM_ETHER | IFM_AUTO | IFM_FDX |
3056 	    IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE);
3057 	ether_ifattach(ifp, dev_addr);
3058 
3059 	/* Register for VLAN events */
3060 	priv->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
3061 	    mlx5e_vlan_rx_add_vid, priv, EVENTHANDLER_PRI_FIRST);
3062 	priv->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
3063 	    mlx5e_vlan_rx_kill_vid, priv, EVENTHANDLER_PRI_FIRST);
3064 
3065 	/* Link is down by default */
3066 	if_link_state_change(ifp, LINK_STATE_DOWN);
3067 
3068 	mlx5e_enable_async_events(priv);
3069 
3070 	mlx5e_add_hw_stats(priv);
3071 
3072 	mlx5e_create_stats(&priv->stats.vport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3073 	    "vstats", mlx5e_vport_stats_desc, MLX5E_VPORT_STATS_NUM,
3074 	    priv->stats.vport.arg);
3075 
3076 	mlx5e_create_stats(&priv->stats.pport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3077 	    "pstats", mlx5e_pport_stats_desc, MLX5E_PPORT_STATS_NUM,
3078 	    priv->stats.pport.arg);
3079 
3080 	mlx5e_create_ethtool(priv);
3081 
3082 	mtx_lock(&priv->async_events_mtx);
3083 	mlx5e_update_stats(priv);
3084 	mtx_unlock(&priv->async_events_mtx);
3085 
3086 	return (priv);
3087 
3088 err_dealloc_transport_domain:
3089 	mlx5_dealloc_transport_domain(mdev, priv->tdn);
3090 
3091 err_dealloc_pd:
3092 	mlx5_core_dealloc_pd(mdev, priv->pdn);
3093 
3094 err_unmap_free_uar:
3095 	mlx5_unmap_free_uar(mdev, &priv->cq_uar);
3096 
3097 err_free_sysctl:
3098 	sysctl_ctx_free(&priv->sysctl_ctx);
3099 
3100 	if_free(ifp);
3101 
3102 err_free_priv:
3103 	mlx5e_priv_mtx_destroy(priv);
3104 	free(priv, M_MLX5EN);
3105 	return (NULL);
3106 }
3107 
3108 static void
3109 mlx5e_destroy_ifp(struct mlx5_core_dev *mdev, void *vpriv)
3110 {
3111 	struct mlx5e_priv *priv = vpriv;
3112 	struct ifnet *ifp = priv->ifp;
3113 
3114 	/* don't allow more IOCTLs */
3115 	priv->gone = 1;
3116 
3117 	/* XXX wait a bit to allow IOCTL handlers to complete */
3118 	pause("W", hz);
3119 
3120 	/* stop watchdog timer */
3121 	callout_drain(&priv->watchdog);
3122 
3123 	if (priv->vlan_attach != NULL)
3124 		EVENTHANDLER_DEREGISTER(vlan_config, priv->vlan_attach);
3125 	if (priv->vlan_detach != NULL)
3126 		EVENTHANDLER_DEREGISTER(vlan_unconfig, priv->vlan_detach);
3127 
3128 	/* make sure device gets closed */
3129 	PRIV_LOCK(priv);
3130 	mlx5e_close_locked(ifp);
3131 	PRIV_UNLOCK(priv);
3132 
3133 	/* unregister device */
3134 	ifmedia_removeall(&priv->media);
3135 	ether_ifdetach(ifp);
3136 	if_free(ifp);
3137 
3138 	/* destroy all remaining sysctl nodes */
3139 	if (priv->sysctl_debug)
3140 		sysctl_ctx_free(&priv->stats.port_stats_debug.ctx);
3141 	sysctl_ctx_free(&priv->stats.vport.ctx);
3142 	sysctl_ctx_free(&priv->stats.pport.ctx);
3143 	sysctl_ctx_free(&priv->sysctl_ctx);
3144 
3145 	mlx5_core_destroy_mkey(priv->mdev, &priv->mr);
3146 	mlx5_dealloc_transport_domain(priv->mdev, priv->tdn);
3147 	mlx5_core_dealloc_pd(priv->mdev, priv->pdn);
3148 	mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar);
3149 	mlx5e_disable_async_events(priv);
3150 	flush_scheduled_work();
3151 	mlx5e_priv_mtx_destroy(priv);
3152 	free(priv, M_MLX5EN);
3153 }
3154 
3155 static void *
3156 mlx5e_get_ifp(void *vpriv)
3157 {
3158 	struct mlx5e_priv *priv = vpriv;
3159 
3160 	return (priv->ifp);
3161 }
3162 
3163 static struct mlx5_interface mlx5e_interface = {
3164 	.add = mlx5e_create_ifp,
3165 	.remove = mlx5e_destroy_ifp,
3166 	.event = mlx5e_async_event,
3167 	.protocol = MLX5_INTERFACE_PROTOCOL_ETH,
3168 	.get_dev = mlx5e_get_ifp,
3169 };
3170 
3171 void
3172 mlx5e_init(void)
3173 {
3174 	mlx5_register_interface(&mlx5e_interface);
3175 }
3176 
3177 void
3178 mlx5e_cleanup(void)
3179 {
3180 	mlx5_unregister_interface(&mlx5e_interface);
3181 }
3182 
3183 module_init_order(mlx5e_init, SI_ORDER_THIRD);
3184 module_exit_order(mlx5e_cleanup, SI_ORDER_THIRD);
3185 
3186 #if (__FreeBSD_version >= 1100000)
3187 MODULE_DEPEND(mlx5en, linuxkpi, 1, 1, 1);
3188 #endif
3189 MODULE_DEPEND(mlx5en, mlx5, 1, 1, 1);
3190 MODULE_VERSION(mlx5en, 1);
3191