xref: /freebsd/sys/dev/mlx5/mlx5_en/mlx5_en_main.c (revision 8ef24a0d4b28fe230e20637f56869cc4148cd2ca)
1 /*-
2  * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  *
25  * $FreeBSD$
26  */
27 
28 #include "en.h"
29 
30 #include <sys/sockio.h>
31 #include <machine/atomic.h>
32 
33 #define	ETH_DRIVER_VERSION	"3.1.0-dev"
34 char mlx5e_version[] = "Mellanox Ethernet driver"
35     " (" ETH_DRIVER_VERSION ")";
36 
37 struct mlx5e_rq_param {
38 	u32	rqc [MLX5_ST_SZ_DW(rqc)];
39 	struct mlx5_wq_param wq;
40 };
41 
42 struct mlx5e_sq_param {
43 	u32	sqc [MLX5_ST_SZ_DW(sqc)];
44 	struct mlx5_wq_param wq;
45 };
46 
47 struct mlx5e_cq_param {
48 	u32	cqc [MLX5_ST_SZ_DW(cqc)];
49 	struct mlx5_wq_param wq;
50 	u16	eq_ix;
51 };
52 
53 struct mlx5e_channel_param {
54 	struct mlx5e_rq_param rq;
55 	struct mlx5e_sq_param sq;
56 	struct mlx5e_cq_param rx_cq;
57 	struct mlx5e_cq_param tx_cq;
58 };
59 
60 static const struct {
61 	u32	subtype;
62 	u64	baudrate;
63 }	mlx5e_mode_table[MLX5E_LINK_MODES_NUMBER] = {
64 
65 	[MLX5E_1000BASE_CX_SGMII] = {
66 		.subtype = IFM_1000_CX_SGMII,
67 		.baudrate = IF_Mbps(1000ULL),
68 	},
69 	[MLX5E_1000BASE_KX] = {
70 		.subtype = IFM_1000_KX,
71 		.baudrate = IF_Mbps(1000ULL),
72 	},
73 	[MLX5E_10GBASE_CX4] = {
74 		.subtype = IFM_10G_CX4,
75 		.baudrate = IF_Gbps(10ULL),
76 	},
77 	[MLX5E_10GBASE_KX4] = {
78 		.subtype = IFM_10G_KX4,
79 		.baudrate = IF_Gbps(10ULL),
80 	},
81 	[MLX5E_10GBASE_KR] = {
82 		.subtype = IFM_10G_KR,
83 		.baudrate = IF_Gbps(10ULL),
84 	},
85 	[MLX5E_20GBASE_KR2] = {
86 		.subtype = IFM_20G_KR2,
87 		.baudrate = IF_Gbps(20ULL),
88 	},
89 	[MLX5E_40GBASE_CR4] = {
90 		.subtype = IFM_40G_CR4,
91 		.baudrate = IF_Gbps(40ULL),
92 	},
93 	[MLX5E_40GBASE_KR4] = {
94 		.subtype = IFM_40G_KR4,
95 		.baudrate = IF_Gbps(40ULL),
96 	},
97 	[MLX5E_56GBASE_R4] = {
98 		.subtype = IFM_56G_R4,
99 		.baudrate = IF_Gbps(56ULL),
100 	},
101 	[MLX5E_10GBASE_CR] = {
102 		.subtype = IFM_10G_CR1,
103 		.baudrate = IF_Gbps(10ULL),
104 	},
105 	[MLX5E_10GBASE_SR] = {
106 		.subtype = IFM_10G_SR,
107 		.baudrate = IF_Gbps(10ULL),
108 	},
109 	[MLX5E_10GBASE_LR] = {
110 		.subtype = IFM_10G_LR,
111 		.baudrate = IF_Gbps(10ULL),
112 	},
113 	[MLX5E_40GBASE_SR4] = {
114 		.subtype = IFM_40G_SR4,
115 		.baudrate = IF_Gbps(40ULL),
116 	},
117 	[MLX5E_40GBASE_LR4] = {
118 		.subtype = IFM_40G_LR4,
119 		.baudrate = IF_Gbps(40ULL),
120 	},
121 	[MLX5E_100GBASE_CR4] = {
122 		.subtype = IFM_100G_CR4,
123 		.baudrate = IF_Gbps(100ULL),
124 	},
125 	[MLX5E_100GBASE_SR4] = {
126 		.subtype = IFM_100G_SR4,
127 		.baudrate = IF_Gbps(100ULL),
128 	},
129 	[MLX5E_100GBASE_KR4] = {
130 		.subtype = IFM_100G_KR4,
131 		.baudrate = IF_Gbps(100ULL),
132 	},
133 	[MLX5E_100GBASE_LR4] = {
134 		.subtype = IFM_100G_LR4,
135 		.baudrate = IF_Gbps(100ULL),
136 	},
137 	[MLX5E_100BASE_TX] = {
138 		.subtype = IFM_100_TX,
139 		.baudrate = IF_Mbps(100ULL),
140 	},
141 	[MLX5E_100BASE_T] = {
142 		.subtype = IFM_100_T,
143 		.baudrate = IF_Mbps(100ULL),
144 	},
145 	[MLX5E_10GBASE_T] = {
146 		.subtype = IFM_10G_T,
147 		.baudrate = IF_Gbps(10ULL),
148 	},
149 	[MLX5E_25GBASE_CR] = {
150 		.subtype = IFM_25G_CR,
151 		.baudrate = IF_Gbps(25ULL),
152 	},
153 	[MLX5E_25GBASE_KR] = {
154 		.subtype = IFM_25G_KR,
155 		.baudrate = IF_Gbps(25ULL),
156 	},
157 	[MLX5E_25GBASE_SR] = {
158 		.subtype = IFM_25G_SR,
159 		.baudrate = IF_Gbps(25ULL),
160 	},
161 	[MLX5E_50GBASE_CR2] = {
162 		.subtype = IFM_50G_CR2,
163 		.baudrate = IF_Gbps(50ULL),
164 	},
165 	[MLX5E_50GBASE_KR2] = {
166 		.subtype = IFM_50G_KR2,
167 		.baudrate = IF_Gbps(50ULL),
168 	},
169 };
170 
171 MALLOC_DEFINE(M_MLX5EN, "MLX5EN", "MLX5 Ethernet");
172 
173 static void
174 mlx5e_update_carrier(struct mlx5e_priv *priv)
175 {
176 	struct mlx5_core_dev *mdev = priv->mdev;
177 	u32 out[MLX5_ST_SZ_DW(ptys_reg)];
178 	u32 eth_proto_oper;
179 	int error;
180 	u8 port_state;
181 	u8 i;
182 
183 	port_state = mlx5_query_vport_state(mdev,
184 	    MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT);
185 
186 	if (port_state == VPORT_STATE_UP) {
187 		priv->media_status_last |= IFM_ACTIVE;
188 	} else {
189 		priv->media_status_last &= ~IFM_ACTIVE;
190 		priv->media_active_last = IFM_ETHER;
191 		if_link_state_change(priv->ifp, LINK_STATE_DOWN);
192 		return;
193 	}
194 
195 	error = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN);
196 	if (error) {
197 		priv->media_active_last = IFM_ETHER;
198 		priv->ifp->if_baudrate = 1;
199 		if_printf(priv->ifp, "%s: query port ptys failed: 0x%x\n",
200 		    __func__, error);
201 		return;
202 	}
203 	eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper);
204 
205 	for (i = 0; i != MLX5E_LINK_MODES_NUMBER; i++) {
206 		if (mlx5e_mode_table[i].baudrate == 0)
207 			continue;
208 		if (MLX5E_PROT_MASK(i) & eth_proto_oper) {
209 			priv->ifp->if_baudrate =
210 			    mlx5e_mode_table[i].baudrate;
211 			priv->media_active_last =
212 			    mlx5e_mode_table[i].subtype | IFM_ETHER | IFM_FDX;
213 		}
214 	}
215 	if_link_state_change(priv->ifp, LINK_STATE_UP);
216 }
217 
218 static void
219 mlx5e_media_status(struct ifnet *dev, struct ifmediareq *ifmr)
220 {
221 	struct mlx5e_priv *priv = dev->if_softc;
222 
223 	ifmr->ifm_status = priv->media_status_last;
224 	ifmr->ifm_active = priv->media_active_last |
225 	    (priv->params.rx_pauseframe_control ? IFM_ETH_RXPAUSE : 0) |
226 	    (priv->params.tx_pauseframe_control ? IFM_ETH_TXPAUSE : 0);
227 
228 }
229 
230 static u32
231 mlx5e_find_link_mode(u32 subtype)
232 {
233 	u32 i;
234 	u32 link_mode = 0;
235 
236 	for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
237 		if (mlx5e_mode_table[i].baudrate == 0)
238 			continue;
239 		if (mlx5e_mode_table[i].subtype == subtype)
240 			link_mode |= MLX5E_PROT_MASK(i);
241 	}
242 
243 	return (link_mode);
244 }
245 
246 static int
247 mlx5e_media_change(struct ifnet *dev)
248 {
249 	struct mlx5e_priv *priv = dev->if_softc;
250 	struct mlx5_core_dev *mdev = priv->mdev;
251 	u32 eth_proto_cap;
252 	u32 link_mode;
253 	int was_opened;
254 	int locked;
255 	int error;
256 
257 	locked = PRIV_LOCKED(priv);
258 	if (!locked)
259 		PRIV_LOCK(priv);
260 
261 	if (IFM_TYPE(priv->media.ifm_media) != IFM_ETHER) {
262 		error = EINVAL;
263 		goto done;
264 	}
265 	link_mode = mlx5e_find_link_mode(IFM_SUBTYPE(priv->media.ifm_media));
266 
267 	/* query supported capabilities */
268 	error = mlx5_query_port_proto_cap(mdev, &eth_proto_cap, MLX5_PTYS_EN);
269 	if (error != 0) {
270 		if_printf(dev, "Query port media capability failed\n");
271 		goto done;
272 	}
273 	/* check for autoselect */
274 	if (IFM_SUBTYPE(priv->media.ifm_media) == IFM_AUTO) {
275 		link_mode = eth_proto_cap;
276 		if (link_mode == 0) {
277 			if_printf(dev, "Port media capability is zero\n");
278 			error = EINVAL;
279 			goto done;
280 		}
281 	} else {
282 		link_mode = link_mode & eth_proto_cap;
283 		if (link_mode == 0) {
284 			if_printf(dev, "Not supported link mode requested\n");
285 			error = EINVAL;
286 			goto done;
287 		}
288 	}
289 	/* update pauseframe control bits */
290 	priv->params.rx_pauseframe_control =
291 	    (priv->media.ifm_media & IFM_ETH_RXPAUSE) ? 1 : 0;
292 	priv->params.tx_pauseframe_control =
293 	    (priv->media.ifm_media & IFM_ETH_TXPAUSE) ? 1 : 0;
294 
295 	/* check if device is opened */
296 	was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
297 
298 	/* reconfigure the hardware */
299 	mlx5_set_port_status(mdev, MLX5_PORT_DOWN);
300 	mlx5_set_port_proto(mdev, link_mode, MLX5_PTYS_EN);
301 	mlx5_set_port_pause(mdev, 1,
302 	    priv->params.rx_pauseframe_control,
303 	    priv->params.tx_pauseframe_control);
304 	if (was_opened)
305 		mlx5_set_port_status(mdev, MLX5_PORT_UP);
306 
307 done:
308 	if (!locked)
309 		PRIV_UNLOCK(priv);
310 	return (error);
311 }
312 
313 static void
314 mlx5e_update_carrier_work(struct work_struct *work)
315 {
316 	struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
317 	    update_carrier_work);
318 
319 	PRIV_LOCK(priv);
320 	if (test_bit(MLX5E_STATE_OPENED, &priv->state))
321 		mlx5e_update_carrier(priv);
322 	PRIV_UNLOCK(priv);
323 }
324 
325 static void
326 mlx5e_update_pport_counters(struct mlx5e_priv *priv)
327 {
328 	struct mlx5_core_dev *mdev = priv->mdev;
329 	struct mlx5e_pport_stats *s = &priv->stats.pport;
330 	struct mlx5e_port_stats_debug *s_debug = &priv->stats.port_stats_debug;
331 	u32 *in;
332 	u32 *out;
333 	u64 *ptr;
334 	unsigned sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
335 	unsigned x;
336 	unsigned y;
337 
338 	in = mlx5_vzalloc(sz);
339 	out = mlx5_vzalloc(sz);
340 	if (in == NULL || out == NULL)
341 		goto free_out;
342 
343 	ptr = (uint64_t *)MLX5_ADDR_OF(ppcnt_reg, out, counter_set);
344 
345 	MLX5_SET(ppcnt_reg, in, local_port, 1);
346 
347 	MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP);
348 	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
349 	for (x = y = 0; x != MLX5E_PPORT_IEEE802_3_STATS_NUM; x++, y++)
350 		s->arg[y] = be64toh(ptr[x]);
351 
352 	MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP);
353 	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
354 	for (x = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM; x++, y++)
355 		s->arg[y] = be64toh(ptr[x]);
356 	for (y = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM +
357 	    MLX5E_PPORT_RFC2819_STATS_DEBUG_NUM; x++, y++)
358 		s_debug->arg[y] = be64toh(ptr[x]);
359 
360 	MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2863_COUNTERS_GROUP);
361 	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
362 	for (x = 0; x != MLX5E_PPORT_RFC2863_STATS_DEBUG_NUM; x++, y++)
363 		s_debug->arg[y] = be64toh(ptr[x]);
364 
365 	MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP);
366 	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
367 	for (x = 0; x != MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG_NUM; x++, y++)
368 		s_debug->arg[y] = be64toh(ptr[x]);
369 free_out:
370 	kvfree(in);
371 	kvfree(out);
372 }
373 
374 static void
375 mlx5e_update_stats_work(struct work_struct *work)
376 {
377 	struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
378 	    update_stats_work);
379 	struct mlx5_core_dev *mdev = priv->mdev;
380 	struct mlx5e_vport_stats *s = &priv->stats.vport;
381 	struct mlx5e_rq_stats *rq_stats;
382 	struct mlx5e_sq_stats *sq_stats;
383 	struct buf_ring *sq_br;
384 #if (__FreeBSD_version < 1100000)
385 	struct ifnet *ifp = priv->ifp;
386 #endif
387 
388 	u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)];
389 	u32 *out;
390 	int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out);
391 	u64 tso_packets = 0;
392 	u64 tso_bytes = 0;
393 	u64 tx_queue_dropped = 0;
394 	u64 tx_defragged = 0;
395 	u64 tx_offload_none = 0;
396 	u64 lro_packets = 0;
397 	u64 lro_bytes = 0;
398 	u64 sw_lro_queued = 0;
399 	u64 sw_lro_flushed = 0;
400 	u64 rx_csum_none = 0;
401 	u64 rx_wqe_err = 0;
402 	u32 rx_out_of_buffer = 0;
403 	int i;
404 	int j;
405 
406 	PRIV_LOCK(priv);
407 	out = mlx5_vzalloc(outlen);
408 	if (out == NULL)
409 		goto free_out;
410 	if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
411 		goto free_out;
412 
413 	/* Collect firts the SW counters and then HW for consistency */
414 	for (i = 0; i < priv->params.num_channels; i++) {
415 		struct mlx5e_rq *rq = &priv->channel[i]->rq;
416 
417 		rq_stats = &priv->channel[i]->rq.stats;
418 
419 		/* collect stats from LRO */
420 		rq_stats->sw_lro_queued = rq->lro.lro_queued;
421 		rq_stats->sw_lro_flushed = rq->lro.lro_flushed;
422 		sw_lro_queued += rq_stats->sw_lro_queued;
423 		sw_lro_flushed += rq_stats->sw_lro_flushed;
424 		lro_packets += rq_stats->lro_packets;
425 		lro_bytes += rq_stats->lro_bytes;
426 		rx_csum_none += rq_stats->csum_none;
427 		rx_wqe_err += rq_stats->wqe_err;
428 
429 		for (j = 0; j < priv->num_tc; j++) {
430 			sq_stats = &priv->channel[i]->sq[j].stats;
431 			sq_br = priv->channel[i]->sq[j].br;
432 
433 			tso_packets += sq_stats->tso_packets;
434 			tso_bytes += sq_stats->tso_bytes;
435 			tx_queue_dropped += sq_stats->dropped;
436 			tx_queue_dropped += sq_br->br_drops;
437 			tx_defragged += sq_stats->defragged;
438 			tx_offload_none += sq_stats->csum_offload_none;
439 		}
440 	}
441 
442 	/* update counters */
443 	s->tso_packets = tso_packets;
444 	s->tso_bytes = tso_bytes;
445 	s->tx_queue_dropped = tx_queue_dropped;
446 	s->tx_defragged = tx_defragged;
447 	s->lro_packets = lro_packets;
448 	s->lro_bytes = lro_bytes;
449 	s->sw_lro_queued = sw_lro_queued;
450 	s->sw_lro_flushed = sw_lro_flushed;
451 	s->rx_csum_none = rx_csum_none;
452 	s->rx_wqe_err = rx_wqe_err;
453 
454 	/* HW counters */
455 	memset(in, 0, sizeof(in));
456 
457 	MLX5_SET(query_vport_counter_in, in, opcode,
458 	    MLX5_CMD_OP_QUERY_VPORT_COUNTER);
459 	MLX5_SET(query_vport_counter_in, in, op_mod, 0);
460 	MLX5_SET(query_vport_counter_in, in, other_vport, 0);
461 
462 	memset(out, 0, outlen);
463 
464 	/* get number of out-of-buffer drops first */
465 	if (mlx5_vport_query_out_of_rx_buffer(mdev, priv->counter_set_id,
466 	    &rx_out_of_buffer))
467 		goto free_out;
468 
469 	/* accumulate difference into a 64-bit counter */
470 	s->rx_out_of_buffer += (u64)(u32)(rx_out_of_buffer - s->rx_out_of_buffer_prev);
471 	s->rx_out_of_buffer_prev = rx_out_of_buffer;
472 
473 	/* get port statistics */
474 	if (mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen))
475 		goto free_out;
476 
477 #define	MLX5_GET_CTR(out, x) \
478 	MLX5_GET64(query_vport_counter_out, out, x)
479 
480 	s->rx_error_packets =
481 	    MLX5_GET_CTR(out, received_errors.packets);
482 	s->rx_error_bytes =
483 	    MLX5_GET_CTR(out, received_errors.octets);
484 	s->tx_error_packets =
485 	    MLX5_GET_CTR(out, transmit_errors.packets);
486 	s->tx_error_bytes =
487 	    MLX5_GET_CTR(out, transmit_errors.octets);
488 
489 	s->rx_unicast_packets =
490 	    MLX5_GET_CTR(out, received_eth_unicast.packets);
491 	s->rx_unicast_bytes =
492 	    MLX5_GET_CTR(out, received_eth_unicast.octets);
493 	s->tx_unicast_packets =
494 	    MLX5_GET_CTR(out, transmitted_eth_unicast.packets);
495 	s->tx_unicast_bytes =
496 	    MLX5_GET_CTR(out, transmitted_eth_unicast.octets);
497 
498 	s->rx_multicast_packets =
499 	    MLX5_GET_CTR(out, received_eth_multicast.packets);
500 	s->rx_multicast_bytes =
501 	    MLX5_GET_CTR(out, received_eth_multicast.octets);
502 	s->tx_multicast_packets =
503 	    MLX5_GET_CTR(out, transmitted_eth_multicast.packets);
504 	s->tx_multicast_bytes =
505 	    MLX5_GET_CTR(out, transmitted_eth_multicast.octets);
506 
507 	s->rx_broadcast_packets =
508 	    MLX5_GET_CTR(out, received_eth_broadcast.packets);
509 	s->rx_broadcast_bytes =
510 	    MLX5_GET_CTR(out, received_eth_broadcast.octets);
511 	s->tx_broadcast_packets =
512 	    MLX5_GET_CTR(out, transmitted_eth_broadcast.packets);
513 	s->tx_broadcast_bytes =
514 	    MLX5_GET_CTR(out, transmitted_eth_broadcast.octets);
515 
516 	s->rx_packets =
517 	    s->rx_unicast_packets +
518 	    s->rx_multicast_packets +
519 	    s->rx_broadcast_packets -
520 	    s->rx_out_of_buffer;
521 	s->rx_bytes =
522 	    s->rx_unicast_bytes +
523 	    s->rx_multicast_bytes +
524 	    s->rx_broadcast_bytes;
525 	s->tx_packets =
526 	    s->tx_unicast_packets +
527 	    s->tx_multicast_packets +
528 	    s->tx_broadcast_packets;
529 	s->tx_bytes =
530 	    s->tx_unicast_bytes +
531 	    s->tx_multicast_bytes +
532 	    s->tx_broadcast_bytes;
533 
534 	/* Update calculated offload counters */
535 	s->tx_csum_offload = s->tx_packets - tx_offload_none;
536 	s->rx_csum_good = s->rx_packets - s->rx_csum_none;
537 
538 	/* Update per port counters */
539 	mlx5e_update_pport_counters(priv);
540 
541 #if (__FreeBSD_version < 1100000)
542 	/* no get_counters interface in fbsd 10 */
543 	ifp->if_ipackets = s->rx_packets;
544 	ifp->if_ierrors = s->rx_error_packets;
545 	ifp->if_iqdrops = s->rx_out_of_buffer;
546 	ifp->if_opackets = s->tx_packets;
547 	ifp->if_oerrors = s->tx_error_packets;
548 	ifp->if_snd.ifq_drops = s->tx_queue_dropped;
549 	ifp->if_ibytes = s->rx_bytes;
550 	ifp->if_obytes = s->tx_bytes;
551 #endif
552 
553 free_out:
554 	kvfree(out);
555 	PRIV_UNLOCK(priv);
556 }
557 
558 static void
559 mlx5e_update_stats(void *arg)
560 {
561 	struct mlx5e_priv *priv = arg;
562 
563 	schedule_work(&priv->update_stats_work);
564 
565 	callout_reset(&priv->watchdog, hz, &mlx5e_update_stats, priv);
566 }
567 
568 static void
569 mlx5e_async_event_sub(struct mlx5e_priv *priv,
570     enum mlx5_dev_event event)
571 {
572 	switch (event) {
573 	case MLX5_DEV_EVENT_PORT_UP:
574 	case MLX5_DEV_EVENT_PORT_DOWN:
575 		schedule_work(&priv->update_carrier_work);
576 		break;
577 
578 	default:
579 		break;
580 	}
581 }
582 
583 static void
584 mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv,
585     enum mlx5_dev_event event, unsigned long param)
586 {
587 	struct mlx5e_priv *priv = vpriv;
588 
589 	mtx_lock(&priv->async_events_mtx);
590 	if (test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state))
591 		mlx5e_async_event_sub(priv, event);
592 	mtx_unlock(&priv->async_events_mtx);
593 }
594 
595 static void
596 mlx5e_enable_async_events(struct mlx5e_priv *priv)
597 {
598 	set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
599 }
600 
601 static void
602 mlx5e_disable_async_events(struct mlx5e_priv *priv)
603 {
604 	mtx_lock(&priv->async_events_mtx);
605 	clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
606 	mtx_unlock(&priv->async_events_mtx);
607 }
608 
609 static const char *mlx5e_rq_stats_desc[] = {
610 	MLX5E_RQ_STATS(MLX5E_STATS_DESC)
611 };
612 
613 static int
614 mlx5e_create_rq(struct mlx5e_channel *c,
615     struct mlx5e_rq_param *param,
616     struct mlx5e_rq *rq)
617 {
618 	struct mlx5e_priv *priv = c->priv;
619 	struct mlx5_core_dev *mdev = priv->mdev;
620 	char buffer[16];
621 	void *rqc = param->rqc;
622 	void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
623 	int wq_sz;
624 	int err;
625 	int i;
626 
627 	/* Create DMA descriptor TAG */
628 	if ((err = -bus_dma_tag_create(
629 	    bus_get_dma_tag(mdev->pdev->dev.bsddev),
630 	    1,				/* any alignment */
631 	    0,				/* no boundary */
632 	    BUS_SPACE_MAXADDR,		/* lowaddr */
633 	    BUS_SPACE_MAXADDR,		/* highaddr */
634 	    NULL, NULL,			/* filter, filterarg */
635 	    MJUM16BYTES,		/* maxsize */
636 	    1,				/* nsegments */
637 	    MJUM16BYTES,		/* maxsegsize */
638 	    0,				/* flags */
639 	    NULL, NULL,			/* lockfunc, lockfuncarg */
640 	    &rq->dma_tag)))
641 		goto done;
642 
643 	err = mlx5_wq_ll_create(mdev, &param->wq, rqc_wq, &rq->wq,
644 	    &rq->wq_ctrl);
645 	if (err)
646 		goto err_free_dma_tag;
647 
648 	rq->wq.db = &rq->wq.db[MLX5_RCV_DBR];
649 
650 	if (priv->params.hw_lro_en) {
651 		rq->wqe_sz = priv->params.lro_wqe_sz;
652 	} else {
653 		rq->wqe_sz = MLX5E_SW2MB_MTU(priv->ifp->if_mtu);
654 	}
655 	if (rq->wqe_sz > MJUM16BYTES) {
656 		err = -ENOMEM;
657 		goto err_rq_wq_destroy;
658 	} else if (rq->wqe_sz > MJUM9BYTES) {
659 		rq->wqe_sz = MJUM16BYTES;
660 	} else if (rq->wqe_sz > MJUMPAGESIZE) {
661 		rq->wqe_sz = MJUM9BYTES;
662 	} else if (rq->wqe_sz > MCLBYTES) {
663 		rq->wqe_sz = MJUMPAGESIZE;
664 	} else {
665 		rq->wqe_sz = MCLBYTES;
666 	}
667 
668 	wq_sz = mlx5_wq_ll_get_size(&rq->wq);
669 	rq->mbuf = malloc(wq_sz * sizeof(rq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO);
670 	if (rq->mbuf == NULL) {
671 		err = -ENOMEM;
672 		goto err_rq_wq_destroy;
673 	}
674 	for (i = 0; i != wq_sz; i++) {
675 		struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i);
676 		uint32_t byte_count = rq->wqe_sz - MLX5E_NET_IP_ALIGN;
677 
678 		err = -bus_dmamap_create(rq->dma_tag, 0, &rq->mbuf[i].dma_map);
679 		if (err != 0) {
680 			while (i--)
681 				bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map);
682 			goto err_rq_mbuf_free;
683 		}
684 		wqe->data.lkey = c->mkey_be;
685 		wqe->data.byte_count = cpu_to_be32(byte_count | MLX5_HW_START_PADDING);
686 	}
687 
688 	rq->pdev = c->pdev;
689 	rq->ifp = c->ifp;
690 	rq->channel = c;
691 	rq->ix = c->ix;
692 
693 	snprintf(buffer, sizeof(buffer), "rxstat%d", c->ix);
694 	mlx5e_create_stats(&rq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
695 	    buffer, mlx5e_rq_stats_desc, MLX5E_RQ_STATS_NUM,
696 	    rq->stats.arg);
697 
698 #ifdef HAVE_TURBO_LRO
699 	if (tcp_tlro_init(&rq->lro, c->ifp, MLX5E_BUDGET_MAX) != 0)
700 		rq->lro.mbuf = NULL;
701 #else
702 	if (tcp_lro_init(&rq->lro))
703 		rq->lro.lro_cnt = 0;
704 	else
705 		rq->lro.ifp = c->ifp;
706 #endif
707 	return (0);
708 
709 err_rq_mbuf_free:
710 	free(rq->mbuf, M_MLX5EN);
711 err_rq_wq_destroy:
712 	mlx5_wq_destroy(&rq->wq_ctrl);
713 err_free_dma_tag:
714 	bus_dma_tag_destroy(rq->dma_tag);
715 done:
716 	return (err);
717 }
718 
719 static void
720 mlx5e_destroy_rq(struct mlx5e_rq *rq)
721 {
722 	int wq_sz;
723 	int i;
724 
725 	/* destroy all sysctl nodes */
726 	sysctl_ctx_free(&rq->stats.ctx);
727 
728 	/* free leftover LRO packets, if any */
729 #ifdef HAVE_TURBO_LRO
730 	tcp_tlro_free(&rq->lro);
731 #else
732 	tcp_lro_free(&rq->lro);
733 #endif
734 	wq_sz = mlx5_wq_ll_get_size(&rq->wq);
735 	for (i = 0; i != wq_sz; i++) {
736 		if (rq->mbuf[i].mbuf != NULL) {
737 			bus_dmamap_unload(rq->dma_tag,
738 			    rq->mbuf[i].dma_map);
739 			m_freem(rq->mbuf[i].mbuf);
740 		}
741 		bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map);
742 	}
743 	free(rq->mbuf, M_MLX5EN);
744 	mlx5_wq_destroy(&rq->wq_ctrl);
745 }
746 
747 static int
748 mlx5e_enable_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param)
749 {
750 	struct mlx5e_channel *c = rq->channel;
751 	struct mlx5e_priv *priv = c->priv;
752 	struct mlx5_core_dev *mdev = priv->mdev;
753 
754 	void *in;
755 	void *rqc;
756 	void *wq;
757 	int inlen;
758 	int err;
759 
760 	inlen = MLX5_ST_SZ_BYTES(create_rq_in) +
761 	    sizeof(u64) * rq->wq_ctrl.buf.npages;
762 	in = mlx5_vzalloc(inlen);
763 	if (in == NULL)
764 		return (-ENOMEM);
765 
766 	rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
767 	wq = MLX5_ADDR_OF(rqc, rqc, wq);
768 
769 	memcpy(rqc, param->rqc, sizeof(param->rqc));
770 
771 	MLX5_SET(rqc, rqc, cqn, c->rq.cq.mcq.cqn);
772 	MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
773 	MLX5_SET(rqc, rqc, flush_in_error_en, 1);
774 	if (priv->counter_set_id >= 0)
775 		MLX5_SET(rqc, rqc, counter_set_id, priv->counter_set_id);
776 	MLX5_SET(wq, wq, log_wq_pg_sz, rq->wq_ctrl.buf.page_shift -
777 	    PAGE_SHIFT);
778 	MLX5_SET64(wq, wq, dbr_addr, rq->wq_ctrl.db.dma);
779 
780 	mlx5_fill_page_array(&rq->wq_ctrl.buf,
781 	    (__be64 *) MLX5_ADDR_OF(wq, wq, pas));
782 
783 	err = mlx5_core_create_rq(mdev, in, inlen, &rq->rqn);
784 
785 	kvfree(in);
786 
787 	return (err);
788 }
789 
790 static int
791 mlx5e_modify_rq(struct mlx5e_rq *rq, int curr_state, int next_state)
792 {
793 	struct mlx5e_channel *c = rq->channel;
794 	struct mlx5e_priv *priv = c->priv;
795 	struct mlx5_core_dev *mdev = priv->mdev;
796 
797 	void *in;
798 	void *rqc;
799 	int inlen;
800 	int err;
801 
802 	inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
803 	in = mlx5_vzalloc(inlen);
804 	if (in == NULL)
805 		return (-ENOMEM);
806 
807 	rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
808 
809 	MLX5_SET(modify_rq_in, in, rqn, rq->rqn);
810 	MLX5_SET(modify_rq_in, in, rq_state, curr_state);
811 	MLX5_SET(rqc, rqc, state, next_state);
812 
813 	err = mlx5_core_modify_rq(mdev, in, inlen);
814 
815 	kvfree(in);
816 
817 	return (err);
818 }
819 
820 static void
821 mlx5e_disable_rq(struct mlx5e_rq *rq)
822 {
823 	struct mlx5e_channel *c = rq->channel;
824 	struct mlx5e_priv *priv = c->priv;
825 	struct mlx5_core_dev *mdev = priv->mdev;
826 
827 	mlx5_core_destroy_rq(mdev, rq->rqn);
828 }
829 
830 static int
831 mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq)
832 {
833 	struct mlx5e_channel *c = rq->channel;
834 	struct mlx5e_priv *priv = c->priv;
835 	struct mlx5_wq_ll *wq = &rq->wq;
836 	int i;
837 
838 	for (i = 0; i < 1000; i++) {
839 		if (wq->cur_sz >= priv->params.min_rx_wqes)
840 			return (0);
841 
842 		msleep(4);
843 	}
844 	return (-ETIMEDOUT);
845 }
846 
847 static int
848 mlx5e_open_rq(struct mlx5e_channel *c,
849     struct mlx5e_rq_param *param,
850     struct mlx5e_rq *rq)
851 {
852 	int err;
853 	int i;
854 
855 	err = mlx5e_create_rq(c, param, rq);
856 	if (err)
857 		return (err);
858 
859 	err = mlx5e_enable_rq(rq, param);
860 	if (err)
861 		goto err_destroy_rq;
862 
863 	err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
864 	if (err)
865 		goto err_disable_rq;
866 
867 	c->rq.enabled = 1;
868 
869 	/*
870 	 * Test send queues, which will trigger
871 	 * "mlx5e_post_rx_wqes()":
872 	 */
873 	for (i = 0; i != c->num_tc; i++)
874 		mlx5e_send_nop(&c->sq[i], 1, true);
875 	return (0);
876 
877 err_disable_rq:
878 	mlx5e_disable_rq(rq);
879 err_destroy_rq:
880 	mlx5e_destroy_rq(rq);
881 
882 	return (err);
883 }
884 
885 static void
886 mlx5e_close_rq(struct mlx5e_rq *rq)
887 {
888 	rq->enabled = 0;
889 	mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
890 }
891 
892 static void
893 mlx5e_close_rq_wait(struct mlx5e_rq *rq)
894 {
895 	/* wait till RQ is empty */
896 	while (!mlx5_wq_ll_is_empty(&rq->wq)) {
897 		msleep(4);
898 		rq->cq.mcq.comp(&rq->cq.mcq);
899 	}
900 
901 	mlx5e_disable_rq(rq);
902 	mlx5e_destroy_rq(rq);
903 }
904 
905 static void
906 mlx5e_free_sq_db(struct mlx5e_sq *sq)
907 {
908 	int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
909 	int x;
910 
911 	for (x = 0; x != wq_sz; x++)
912 		bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map);
913 	free(sq->mbuf, M_MLX5EN);
914 }
915 
916 static int
917 mlx5e_alloc_sq_db(struct mlx5e_sq *sq)
918 {
919 	int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
920 	int err;
921 	int x;
922 
923 	sq->mbuf = malloc(wq_sz * sizeof(sq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO);
924 	if (sq->mbuf == NULL)
925 		return (-ENOMEM);
926 
927 	/* Create DMA descriptor MAPs */
928 	for (x = 0; x != wq_sz; x++) {
929 		err = -bus_dmamap_create(sq->dma_tag, 0, &sq->mbuf[x].dma_map);
930 		if (err != 0) {
931 			while (x--)
932 				bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map);
933 			free(sq->mbuf, M_MLX5EN);
934 			return (err);
935 		}
936 	}
937 	return (0);
938 }
939 
940 static const char *mlx5e_sq_stats_desc[] = {
941 	MLX5E_SQ_STATS(MLX5E_STATS_DESC)
942 };
943 
944 static int
945 mlx5e_create_sq(struct mlx5e_channel *c,
946     int tc,
947     struct mlx5e_sq_param *param,
948     struct mlx5e_sq *sq)
949 {
950 	struct mlx5e_priv *priv = c->priv;
951 	struct mlx5_core_dev *mdev = priv->mdev;
952 	char buffer[16];
953 
954 	void *sqc = param->sqc;
955 	void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq);
956 #ifdef RSS
957 	cpuset_t cpu_mask;
958 	int cpu_id;
959 #endif
960 	int err;
961 
962 	/* Create DMA descriptor TAG */
963 	if ((err = -bus_dma_tag_create(
964 	    bus_get_dma_tag(mdev->pdev->dev.bsddev),
965 	    1,				/* any alignment */
966 	    0,				/* no boundary */
967 	    BUS_SPACE_MAXADDR,		/* lowaddr */
968 	    BUS_SPACE_MAXADDR,		/* highaddr */
969 	    NULL, NULL,			/* filter, filterarg */
970 	    MLX5E_MAX_TX_PAYLOAD_SIZE,	/* maxsize */
971 	    MLX5E_MAX_TX_MBUF_FRAGS,	/* nsegments */
972 	    MLX5E_MAX_TX_MBUF_SIZE,	/* maxsegsize */
973 	    0,				/* flags */
974 	    NULL, NULL,			/* lockfunc, lockfuncarg */
975 	    &sq->dma_tag)))
976 		goto done;
977 
978 	err = mlx5_alloc_map_uar(mdev, &sq->uar);
979 	if (err)
980 		goto err_free_dma_tag;
981 
982 	err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq,
983 	    &sq->wq_ctrl);
984 	if (err)
985 		goto err_unmap_free_uar;
986 
987 	sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
988 	sq->uar_map = sq->uar.map;
989 	sq->uar_bf_map = sq->uar.bf_map;
990 	sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2;
991 
992 	err = mlx5e_alloc_sq_db(sq);
993 	if (err)
994 		goto err_sq_wq_destroy;
995 
996 	sq->pdev = c->pdev;
997 	sq->mkey_be = c->mkey_be;
998 	sq->channel = c;
999 	sq->tc = tc;
1000 
1001 	sq->br = buf_ring_alloc(MLX5E_SQ_TX_QUEUE_SIZE, M_MLX5EN,
1002 	    M_WAITOK, &sq->lock);
1003 	if (sq->br == NULL) {
1004 		if_printf(c->ifp, "%s: Failed allocating sq drbr buffer\n",
1005 		    __func__);
1006 		err = -ENOMEM;
1007 		goto err_free_sq_db;
1008 	}
1009 
1010 	sq->sq_tq = taskqueue_create_fast("mlx5e_que", M_WAITOK,
1011 	    taskqueue_thread_enqueue, &sq->sq_tq);
1012 	if (sq->sq_tq == NULL) {
1013 		if_printf(c->ifp, "%s: Failed allocating taskqueue\n",
1014 		    __func__);
1015 		err = -ENOMEM;
1016 		goto err_free_drbr;
1017 	}
1018 
1019 	TASK_INIT(&sq->sq_task, 0, mlx5e_tx_que, sq);
1020 #ifdef RSS
1021 	cpu_id = rss_getcpu(c->ix % rss_getnumbuckets());
1022 	CPU_SETOF(cpu_id, &cpu_mask);
1023 	taskqueue_start_threads_cpuset(&sq->sq_tq, 1, PI_NET, &cpu_mask,
1024 	    "%s TX SQ%d.%d CPU%d", c->ifp->if_xname, c->ix, tc, cpu_id);
1025 #else
1026 	taskqueue_start_threads(&sq->sq_tq, 1, PI_NET,
1027 	    "%s TX SQ%d.%d", c->ifp->if_xname, c->ix, tc);
1028 #endif
1029 	snprintf(buffer, sizeof(buffer), "txstat%dtc%d", c->ix, tc);
1030 	mlx5e_create_stats(&sq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
1031 	    buffer, mlx5e_sq_stats_desc, MLX5E_SQ_STATS_NUM,
1032 	    sq->stats.arg);
1033 
1034 	return (0);
1035 
1036 err_free_drbr:
1037 	buf_ring_free(sq->br, M_MLX5EN);
1038 err_free_sq_db:
1039 	mlx5e_free_sq_db(sq);
1040 err_sq_wq_destroy:
1041 	mlx5_wq_destroy(&sq->wq_ctrl);
1042 
1043 err_unmap_free_uar:
1044 	mlx5_unmap_free_uar(mdev, &sq->uar);
1045 
1046 err_free_dma_tag:
1047 	bus_dma_tag_destroy(sq->dma_tag);
1048 done:
1049 	return (err);
1050 }
1051 
1052 static void
1053 mlx5e_destroy_sq(struct mlx5e_sq *sq)
1054 {
1055 	struct mlx5e_channel *c = sq->channel;
1056 	struct mlx5e_priv *priv = c->priv;
1057 
1058 	/* destroy all sysctl nodes */
1059 	sysctl_ctx_free(&sq->stats.ctx);
1060 
1061 	mlx5e_free_sq_db(sq);
1062 	mlx5_wq_destroy(&sq->wq_ctrl);
1063 	mlx5_unmap_free_uar(priv->mdev, &sq->uar);
1064 	taskqueue_drain(sq->sq_tq, &sq->sq_task);
1065 	taskqueue_free(sq->sq_tq);
1066 	buf_ring_free(sq->br, M_MLX5EN);
1067 }
1068 
1069 static int
1070 mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param)
1071 {
1072 	struct mlx5e_channel *c = sq->channel;
1073 	struct mlx5e_priv *priv = c->priv;
1074 	struct mlx5_core_dev *mdev = priv->mdev;
1075 
1076 	void *in;
1077 	void *sqc;
1078 	void *wq;
1079 	int inlen;
1080 	int err;
1081 
1082 	inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
1083 	    sizeof(u64) * sq->wq_ctrl.buf.npages;
1084 	in = mlx5_vzalloc(inlen);
1085 	if (in == NULL)
1086 		return (-ENOMEM);
1087 
1088 	sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
1089 	wq = MLX5_ADDR_OF(sqc, sqc, wq);
1090 
1091 	memcpy(sqc, param->sqc, sizeof(param->sqc));
1092 
1093 	MLX5_SET(sqc, sqc, tis_num_0, priv->tisn[sq->tc]);
1094 	MLX5_SET(sqc, sqc, cqn, c->sq[sq->tc].cq.mcq.cqn);
1095 	MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
1096 	MLX5_SET(sqc, sqc, tis_lst_sz, 1);
1097 	MLX5_SET(sqc, sqc, flush_in_error_en, 1);
1098 
1099 	MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
1100 	MLX5_SET(wq, wq, uar_page, sq->uar.index);
1101 	MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift -
1102 	    PAGE_SHIFT);
1103 	MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma);
1104 
1105 	mlx5_fill_page_array(&sq->wq_ctrl.buf,
1106 	    (__be64 *) MLX5_ADDR_OF(wq, wq, pas));
1107 
1108 	err = mlx5_core_create_sq(mdev, in, inlen, &sq->sqn);
1109 
1110 	kvfree(in);
1111 
1112 	return (err);
1113 }
1114 
1115 static int
1116 mlx5e_modify_sq(struct mlx5e_sq *sq, int curr_state, int next_state)
1117 {
1118 	struct mlx5e_channel *c = sq->channel;
1119 	struct mlx5e_priv *priv = c->priv;
1120 	struct mlx5_core_dev *mdev = priv->mdev;
1121 
1122 	void *in;
1123 	void *sqc;
1124 	int inlen;
1125 	int err;
1126 
1127 	inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
1128 	in = mlx5_vzalloc(inlen);
1129 	if (in == NULL)
1130 		return (-ENOMEM);
1131 
1132 	sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
1133 
1134 	MLX5_SET(modify_sq_in, in, sqn, sq->sqn);
1135 	MLX5_SET(modify_sq_in, in, sq_state, curr_state);
1136 	MLX5_SET(sqc, sqc, state, next_state);
1137 
1138 	err = mlx5_core_modify_sq(mdev, in, inlen);
1139 
1140 	kvfree(in);
1141 
1142 	return (err);
1143 }
1144 
1145 static void
1146 mlx5e_disable_sq(struct mlx5e_sq *sq)
1147 {
1148 	struct mlx5e_channel *c = sq->channel;
1149 	struct mlx5e_priv *priv = c->priv;
1150 	struct mlx5_core_dev *mdev = priv->mdev;
1151 
1152 	mlx5_core_destroy_sq(mdev, sq->sqn);
1153 }
1154 
1155 static int
1156 mlx5e_open_sq(struct mlx5e_channel *c,
1157     int tc,
1158     struct mlx5e_sq_param *param,
1159     struct mlx5e_sq *sq)
1160 {
1161 	int err;
1162 
1163 	err = mlx5e_create_sq(c, tc, param, sq);
1164 	if (err)
1165 		return (err);
1166 
1167 	err = mlx5e_enable_sq(sq, param);
1168 	if (err)
1169 		goto err_destroy_sq;
1170 
1171 	err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY);
1172 	if (err)
1173 		goto err_disable_sq;
1174 
1175 	atomic_store_rel_int(&sq->queue_state, MLX5E_SQ_READY);
1176 
1177 	return (0);
1178 
1179 err_disable_sq:
1180 	mlx5e_disable_sq(sq);
1181 err_destroy_sq:
1182 	mlx5e_destroy_sq(sq);
1183 
1184 	return (err);
1185 }
1186 
1187 static void
1188 mlx5e_close_sq(struct mlx5e_sq *sq)
1189 {
1190 
1191 	/* ensure hw is notified of all pending wqes */
1192 	if (mlx5e_sq_has_room_for(sq, 1))
1193 		mlx5e_send_nop(sq, 1, true);
1194 
1195 	mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR);
1196 }
1197 
1198 static void
1199 mlx5e_close_sq_wait(struct mlx5e_sq *sq)
1200 {
1201 	/* wait till SQ is empty */
1202 	while (sq->cc != sq->pc) {
1203 		msleep(4);
1204 		sq->cq.mcq.comp(&sq->cq.mcq);
1205 	}
1206 
1207 	mlx5e_disable_sq(sq);
1208 	mlx5e_destroy_sq(sq);
1209 }
1210 
1211 static int
1212 mlx5e_create_cq(struct mlx5e_channel *c,
1213     struct mlx5e_cq_param *param,
1214     struct mlx5e_cq *cq,
1215     mlx5e_cq_comp_t *comp)
1216 {
1217 	struct mlx5e_priv *priv = c->priv;
1218 	struct mlx5_core_dev *mdev = priv->mdev;
1219 	struct mlx5_core_cq *mcq = &cq->mcq;
1220 	int eqn_not_used;
1221 	int irqn;
1222 	int err;
1223 	u32 i;
1224 
1225 	param->wq.buf_numa_node = 0;
1226 	param->wq.db_numa_node = 0;
1227 	param->eq_ix = c->ix;
1228 
1229 	err = mlx5_cqwq_create(mdev, &param->wq, param->cqc, &cq->wq,
1230 	    &cq->wq_ctrl);
1231 	if (err)
1232 		return (err);
1233 
1234 	mlx5_vector2eqn(mdev, param->eq_ix, &eqn_not_used, &irqn);
1235 
1236 	mcq->cqe_sz = 64;
1237 	mcq->set_ci_db = cq->wq_ctrl.db.db;
1238 	mcq->arm_db = cq->wq_ctrl.db.db + 1;
1239 	*mcq->set_ci_db = 0;
1240 	*mcq->arm_db = 0;
1241 	mcq->vector = param->eq_ix;
1242 	mcq->comp = comp;
1243 	mcq->event = mlx5e_cq_error_event;
1244 	mcq->irqn = irqn;
1245 	mcq->uar = &priv->cq_uar;
1246 
1247 	for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
1248 		struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
1249 
1250 		cqe->op_own = 0xf1;
1251 	}
1252 
1253 	cq->channel = c;
1254 
1255 	return (0);
1256 }
1257 
1258 static void
1259 mlx5e_destroy_cq(struct mlx5e_cq *cq)
1260 {
1261 	mlx5_wq_destroy(&cq->wq_ctrl);
1262 }
1263 
1264 static int
1265 mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param,
1266     u8 moderation_mode)
1267 {
1268 	struct mlx5e_channel *c = cq->channel;
1269 	struct mlx5e_priv *priv = c->priv;
1270 	struct mlx5_core_dev *mdev = priv->mdev;
1271 	struct mlx5_core_cq *mcq = &cq->mcq;
1272 	void *in;
1273 	void *cqc;
1274 	int inlen;
1275 	int irqn_not_used;
1276 	int eqn;
1277 	int err;
1278 
1279 	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
1280 	    sizeof(u64) * cq->wq_ctrl.buf.npages;
1281 	in = mlx5_vzalloc(inlen);
1282 	if (in == NULL)
1283 		return (-ENOMEM);
1284 
1285 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
1286 
1287 	memcpy(cqc, param->cqc, sizeof(param->cqc));
1288 
1289 	mlx5_fill_page_array(&cq->wq_ctrl.buf,
1290 	    (__be64 *) MLX5_ADDR_OF(create_cq_in, in, pas));
1291 
1292 	mlx5_vector2eqn(mdev, param->eq_ix, &eqn, &irqn_not_used);
1293 
1294 	MLX5_SET(cqc, cqc, cq_period_mode, moderation_mode);
1295 	MLX5_SET(cqc, cqc, c_eqn, eqn);
1296 	MLX5_SET(cqc, cqc, uar_page, mcq->uar->index);
1297 	MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
1298 	    PAGE_SHIFT);
1299 	MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
1300 
1301 	err = mlx5_core_create_cq(mdev, mcq, in, inlen);
1302 
1303 	kvfree(in);
1304 
1305 	if (err)
1306 		return (err);
1307 
1308 	mlx5e_cq_arm(cq);
1309 
1310 	return (0);
1311 }
1312 
1313 static void
1314 mlx5e_disable_cq(struct mlx5e_cq *cq)
1315 {
1316 	struct mlx5e_channel *c = cq->channel;
1317 	struct mlx5e_priv *priv = c->priv;
1318 	struct mlx5_core_dev *mdev = priv->mdev;
1319 
1320 	mlx5_core_destroy_cq(mdev, &cq->mcq);
1321 }
1322 
1323 static int
1324 mlx5e_open_cq(struct mlx5e_channel *c,
1325     struct mlx5e_cq_param *param,
1326     struct mlx5e_cq *cq,
1327     mlx5e_cq_comp_t *comp,
1328     u8 moderation_mode)
1329 {
1330 	int err;
1331 
1332 	err = mlx5e_create_cq(c, param, cq, comp);
1333 	if (err)
1334 		return (err);
1335 
1336 	err = mlx5e_enable_cq(cq, param, moderation_mode);
1337 	if (err)
1338 		goto err_destroy_cq;
1339 
1340 	return (0);
1341 
1342 err_destroy_cq:
1343 	mlx5e_destroy_cq(cq);
1344 
1345 	return (err);
1346 }
1347 
1348 static void
1349 mlx5e_close_cq(struct mlx5e_cq *cq)
1350 {
1351 	mlx5e_disable_cq(cq);
1352 	mlx5e_destroy_cq(cq);
1353 }
1354 
1355 static int
1356 mlx5e_open_tx_cqs(struct mlx5e_channel *c,
1357     struct mlx5e_channel_param *cparam)
1358 {
1359 	u8 tx_moderation_mode;
1360 	int err;
1361 	int tc;
1362 
1363 	switch (c->priv->params.tx_cq_moderation_mode) {
1364 	case 0:
1365 		tx_moderation_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
1366 		break;
1367 	default:
1368 		if (MLX5_CAP_GEN(c->priv->mdev, cq_period_start_from_cqe))
1369 			tx_moderation_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE;
1370 		else
1371 			tx_moderation_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
1372 		break;
1373 	}
1374 	for (tc = 0; tc < c->num_tc; tc++) {
1375 		/* open completion queue */
1376 		err = mlx5e_open_cq(c, &cparam->tx_cq, &c->sq[tc].cq,
1377 		    &mlx5e_tx_cq_comp, tx_moderation_mode);
1378 		if (err)
1379 			goto err_close_tx_cqs;
1380 	}
1381 	return (0);
1382 
1383 err_close_tx_cqs:
1384 	for (tc--; tc >= 0; tc--)
1385 		mlx5e_close_cq(&c->sq[tc].cq);
1386 
1387 	return (err);
1388 }
1389 
1390 static void
1391 mlx5e_close_tx_cqs(struct mlx5e_channel *c)
1392 {
1393 	int tc;
1394 
1395 	for (tc = 0; tc < c->num_tc; tc++)
1396 		mlx5e_close_cq(&c->sq[tc].cq);
1397 }
1398 
1399 static int
1400 mlx5e_open_sqs(struct mlx5e_channel *c,
1401     struct mlx5e_channel_param *cparam)
1402 {
1403 	int err;
1404 	int tc;
1405 
1406 	for (tc = 0; tc < c->num_tc; tc++) {
1407 		err = mlx5e_open_sq(c, tc, &cparam->sq, &c->sq[tc]);
1408 		if (err)
1409 			goto err_close_sqs;
1410 	}
1411 
1412 	return (0);
1413 
1414 err_close_sqs:
1415 	for (tc--; tc >= 0; tc--) {
1416 		mlx5e_close_sq(&c->sq[tc]);
1417 		mlx5e_close_sq_wait(&c->sq[tc]);
1418 	}
1419 
1420 	return (err);
1421 }
1422 
1423 static void
1424 mlx5e_close_sqs(struct mlx5e_channel *c)
1425 {
1426 	int tc;
1427 
1428 	for (tc = 0; tc < c->num_tc; tc++)
1429 		mlx5e_close_sq(&c->sq[tc]);
1430 }
1431 
1432 static void
1433 mlx5e_close_sqs_wait(struct mlx5e_channel *c)
1434 {
1435 	int tc;
1436 
1437 	for (tc = 0; tc < c->num_tc; tc++)
1438 		mlx5e_close_sq_wait(&c->sq[tc]);
1439 }
1440 
1441 static void
1442 mlx5e_chan_mtx_init(struct mlx5e_channel *c)
1443 {
1444 	int tc;
1445 
1446 	mtx_init(&c->rq.mtx, "mlx5rx", MTX_NETWORK_LOCK, MTX_DEF);
1447 
1448 	for (tc = 0; tc < c->num_tc; tc++) {
1449 		mtx_init(&c->sq[tc].lock, "mlx5tx", MTX_NETWORK_LOCK, MTX_DEF);
1450 		mtx_init(&c->sq[tc].comp_lock, "mlx5comp", MTX_NETWORK_LOCK,
1451 		    MTX_DEF);
1452 	}
1453 }
1454 
1455 static void
1456 mlx5e_chan_mtx_destroy(struct mlx5e_channel *c)
1457 {
1458 	int tc;
1459 
1460 	mtx_destroy(&c->rq.mtx);
1461 
1462 	for (tc = 0; tc < c->num_tc; tc++) {
1463 		mtx_destroy(&c->sq[tc].lock);
1464 		mtx_destroy(&c->sq[tc].comp_lock);
1465 	}
1466 }
1467 
1468 static int
1469 mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
1470     struct mlx5e_channel_param *cparam,
1471     struct mlx5e_channel *volatile *cp)
1472 {
1473 	struct mlx5e_channel *c;
1474 	u8 rx_moderation_mode;
1475 	int err;
1476 
1477 	c = malloc(sizeof(*c), M_MLX5EN, M_WAITOK | M_ZERO);
1478 	if (c == NULL)
1479 		return (-ENOMEM);
1480 
1481 	c->priv = priv;
1482 	c->ix = ix;
1483 	c->cpu = 0;
1484 	c->pdev = &priv->mdev->pdev->dev;
1485 	c->ifp = priv->ifp;
1486 	c->mkey_be = cpu_to_be32(priv->mr.key);
1487 	c->num_tc = priv->num_tc;
1488 
1489 	/* init mutexes */
1490 	mlx5e_chan_mtx_init(c);
1491 
1492 	/* open transmit completion queue */
1493 	err = mlx5e_open_tx_cqs(c, cparam);
1494 	if (err)
1495 		goto err_free;
1496 
1497 	switch (priv->params.rx_cq_moderation_mode) {
1498 	case 0:
1499 		rx_moderation_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
1500 		break;
1501 	default:
1502 		if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
1503 			rx_moderation_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE;
1504 		else
1505 			rx_moderation_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
1506 		break;
1507 	}
1508 
1509 	/* open receive completion queue */
1510 	err = mlx5e_open_cq(c, &cparam->rx_cq, &c->rq.cq,
1511 	    &mlx5e_rx_cq_comp, rx_moderation_mode);
1512 	if (err)
1513 		goto err_close_tx_cqs;
1514 
1515 	err = mlx5e_open_sqs(c, cparam);
1516 	if (err)
1517 		goto err_close_rx_cq;
1518 
1519 	err = mlx5e_open_rq(c, &cparam->rq, &c->rq);
1520 	if (err)
1521 		goto err_close_sqs;
1522 
1523 	/* store channel pointer */
1524 	*cp = c;
1525 
1526 	/* poll receive queue initially */
1527 	c->rq.cq.mcq.comp(&c->rq.cq.mcq);
1528 
1529 	return (0);
1530 
1531 err_close_sqs:
1532 	mlx5e_close_sqs(c);
1533 	mlx5e_close_sqs_wait(c);
1534 
1535 err_close_rx_cq:
1536 	mlx5e_close_cq(&c->rq.cq);
1537 
1538 err_close_tx_cqs:
1539 	mlx5e_close_tx_cqs(c);
1540 
1541 err_free:
1542 	/* destroy mutexes */
1543 	mlx5e_chan_mtx_destroy(c);
1544 	free(c, M_MLX5EN);
1545 	return (err);
1546 }
1547 
1548 static void
1549 mlx5e_close_channel(struct mlx5e_channel *volatile *pp)
1550 {
1551 	struct mlx5e_channel *c = *pp;
1552 
1553 	/* check if channel is already closed */
1554 	if (c == NULL)
1555 		return;
1556 	mlx5e_close_rq(&c->rq);
1557 	mlx5e_close_sqs(c);
1558 }
1559 
1560 static void
1561 mlx5e_close_channel_wait(struct mlx5e_channel *volatile *pp)
1562 {
1563 	struct mlx5e_channel *c = *pp;
1564 
1565 	/* check if channel is already closed */
1566 	if (c == NULL)
1567 		return;
1568 	/* ensure channel pointer is no longer used */
1569 	*pp = NULL;
1570 
1571 	mlx5e_close_rq_wait(&c->rq);
1572 	mlx5e_close_sqs_wait(c);
1573 	mlx5e_close_cq(&c->rq.cq);
1574 	mlx5e_close_tx_cqs(c);
1575 	/* destroy mutexes */
1576 	mlx5e_chan_mtx_destroy(c);
1577 	free(c, M_MLX5EN);
1578 }
1579 
1580 static void
1581 mlx5e_build_rq_param(struct mlx5e_priv *priv,
1582     struct mlx5e_rq_param *param)
1583 {
1584 	void *rqc = param->rqc;
1585 	void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
1586 
1587 	MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST);
1588 	MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
1589 	MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe)));
1590 	MLX5_SET(wq, wq, log_wq_sz, priv->params.log_rq_size);
1591 	MLX5_SET(wq, wq, pd, priv->pdn);
1592 
1593 	param->wq.buf_numa_node = 0;
1594 	param->wq.db_numa_node = 0;
1595 	param->wq.linear = 1;
1596 }
1597 
1598 static void
1599 mlx5e_build_sq_param(struct mlx5e_priv *priv,
1600     struct mlx5e_sq_param *param)
1601 {
1602 	void *sqc = param->sqc;
1603 	void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
1604 
1605 	MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size);
1606 	MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
1607 	MLX5_SET(wq, wq, pd, priv->pdn);
1608 
1609 	param->wq.buf_numa_node = 0;
1610 	param->wq.db_numa_node = 0;
1611 	param->wq.linear = 1;
1612 }
1613 
1614 static void
1615 mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
1616     struct mlx5e_cq_param *param)
1617 {
1618 	void *cqc = param->cqc;
1619 
1620 	MLX5_SET(cqc, cqc, uar_page, priv->cq_uar.index);
1621 }
1622 
1623 static void
1624 mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
1625     struct mlx5e_cq_param *param)
1626 {
1627 	void *cqc = param->cqc;
1628 
1629 
1630 	/*
1631 	 * TODO The sysctl to control on/off is a bool value for now, which means
1632 	 * we only support CSUM, once HASH is implemnted we'll need to address that.
1633 	 */
1634 	if (priv->params.cqe_zipping_en) {
1635 		MLX5_SET(cqc, cqc, mini_cqe_res_format, MLX5_CQE_FORMAT_CSUM);
1636 		MLX5_SET(cqc, cqc, cqe_compression_en, 1);
1637 	}
1638 
1639 	MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_rq_size);
1640 	MLX5_SET(cqc, cqc, cq_period, priv->params.rx_cq_moderation_usec);
1641 	MLX5_SET(cqc, cqc, cq_max_count, priv->params.rx_cq_moderation_pkts);
1642 
1643 	mlx5e_build_common_cq_param(priv, param);
1644 }
1645 
1646 static void
1647 mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
1648     struct mlx5e_cq_param *param)
1649 {
1650 	void *cqc = param->cqc;
1651 
1652 	MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_sq_size);
1653 	MLX5_SET(cqc, cqc, cq_period, priv->params.tx_cq_moderation_usec);
1654 	MLX5_SET(cqc, cqc, cq_max_count, priv->params.tx_cq_moderation_pkts);
1655 
1656 	mlx5e_build_common_cq_param(priv, param);
1657 }
1658 
1659 static void
1660 mlx5e_build_channel_param(struct mlx5e_priv *priv,
1661     struct mlx5e_channel_param *cparam)
1662 {
1663 	memset(cparam, 0, sizeof(*cparam));
1664 
1665 	mlx5e_build_rq_param(priv, &cparam->rq);
1666 	mlx5e_build_sq_param(priv, &cparam->sq);
1667 	mlx5e_build_rx_cq_param(priv, &cparam->rx_cq);
1668 	mlx5e_build_tx_cq_param(priv, &cparam->tx_cq);
1669 }
1670 
1671 static int
1672 mlx5e_open_channels(struct mlx5e_priv *priv)
1673 {
1674 	struct mlx5e_channel_param cparam;
1675 	void *ptr;
1676 	int err;
1677 	int i;
1678 	int j;
1679 
1680 	priv->channel = malloc(priv->params.num_channels *
1681 	    sizeof(struct mlx5e_channel *), M_MLX5EN, M_WAITOK | M_ZERO);
1682 	if (priv->channel == NULL)
1683 		return (-ENOMEM);
1684 
1685 	mlx5e_build_channel_param(priv, &cparam);
1686 	for (i = 0; i < priv->params.num_channels; i++) {
1687 		err = mlx5e_open_channel(priv, i, &cparam, &priv->channel[i]);
1688 		if (err)
1689 			goto err_close_channels;
1690 	}
1691 
1692 	for (j = 0; j < priv->params.num_channels; j++) {
1693 		err = mlx5e_wait_for_min_rx_wqes(&priv->channel[j]->rq);
1694 		if (err)
1695 			goto err_close_channels;
1696 	}
1697 
1698 	return (0);
1699 
1700 err_close_channels:
1701 	for (i--; i >= 0; i--) {
1702 		mlx5e_close_channel(&priv->channel[i]);
1703 		mlx5e_close_channel_wait(&priv->channel[i]);
1704 	}
1705 
1706 	/* remove "volatile" attribute from "channel" pointer */
1707 	ptr = __DECONST(void *, priv->channel);
1708 	priv->channel = NULL;
1709 
1710 	free(ptr, M_MLX5EN);
1711 
1712 	return (err);
1713 }
1714 
1715 static void
1716 mlx5e_close_channels(struct mlx5e_priv *priv)
1717 {
1718 	void *ptr;
1719 	int i;
1720 
1721 	if (priv->channel == NULL)
1722 		return;
1723 
1724 	for (i = 0; i < priv->params.num_channels; i++)
1725 		mlx5e_close_channel(&priv->channel[i]);
1726 	for (i = 0; i < priv->params.num_channels; i++)
1727 		mlx5e_close_channel_wait(&priv->channel[i]);
1728 
1729 	/* remove "volatile" attribute from "channel" pointer */
1730 	ptr = __DECONST(void *, priv->channel);
1731 	priv->channel = NULL;
1732 
1733 	free(ptr, M_MLX5EN);
1734 }
1735 
1736 static int
1737 mlx5e_refresh_sq_params(struct mlx5e_priv *priv, struct mlx5e_sq *sq)
1738 {
1739 	return (mlx5_core_modify_cq_moderation(priv->mdev, &sq->cq.mcq,
1740 	    priv->params.tx_cq_moderation_usec,
1741 	    priv->params.tx_cq_moderation_pkts));
1742 }
1743 
1744 static int
1745 mlx5e_refresh_rq_params(struct mlx5e_priv *priv, struct mlx5e_rq *rq)
1746 {
1747 	return (mlx5_core_modify_cq_moderation(priv->mdev, &rq->cq.mcq,
1748 	    priv->params.rx_cq_moderation_usec,
1749 	    priv->params.rx_cq_moderation_pkts));
1750 }
1751 
1752 static int
1753 mlx5e_refresh_channel_params_sub(struct mlx5e_priv *priv, struct mlx5e_channel *c)
1754 {
1755 	int err;
1756 	int i;
1757 
1758 	if (c == NULL)
1759 		return (EINVAL);
1760 
1761 	err = mlx5e_refresh_rq_params(priv, &c->rq);
1762 	if (err)
1763 		goto done;
1764 
1765 	for (i = 0; i != c->num_tc; i++) {
1766 		err = mlx5e_refresh_sq_params(priv, &c->sq[i]);
1767 		if (err)
1768 			goto done;
1769 	}
1770 done:
1771 	return (err);
1772 }
1773 
1774 int
1775 mlx5e_refresh_channel_params(struct mlx5e_priv *priv)
1776 {
1777 	int i;
1778 
1779 	if (priv->channel == NULL)
1780 		return (EINVAL);
1781 
1782 	for (i = 0; i < priv->params.num_channels; i++) {
1783 		int err;
1784 
1785 		err = mlx5e_refresh_channel_params_sub(priv, priv->channel[i]);
1786 		if (err)
1787 			return (err);
1788 	}
1789 	return (0);
1790 }
1791 
1792 static int
1793 mlx5e_open_tis(struct mlx5e_priv *priv, int tc)
1794 {
1795 	struct mlx5_core_dev *mdev = priv->mdev;
1796 	u32 in[MLX5_ST_SZ_DW(create_tis_in)];
1797 	void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
1798 
1799 	memset(in, 0, sizeof(in));
1800 
1801 	MLX5_SET(tisc, tisc, prio, tc);
1802 	MLX5_SET(tisc, tisc, transport_domain, priv->tdn);
1803 
1804 	return (mlx5_core_create_tis(mdev, in, sizeof(in), &priv->tisn[tc]));
1805 }
1806 
1807 static void
1808 mlx5e_close_tis(struct mlx5e_priv *priv, int tc)
1809 {
1810 	mlx5_core_destroy_tis(priv->mdev, priv->tisn[tc]);
1811 }
1812 
1813 static int
1814 mlx5e_open_tises(struct mlx5e_priv *priv)
1815 {
1816 	int num_tc = priv->num_tc;
1817 	int err;
1818 	int tc;
1819 
1820 	for (tc = 0; tc < num_tc; tc++) {
1821 		err = mlx5e_open_tis(priv, tc);
1822 		if (err)
1823 			goto err_close_tises;
1824 	}
1825 
1826 	return (0);
1827 
1828 err_close_tises:
1829 	for (tc--; tc >= 0; tc--)
1830 		mlx5e_close_tis(priv, tc);
1831 
1832 	return (err);
1833 }
1834 
1835 static void
1836 mlx5e_close_tises(struct mlx5e_priv *priv)
1837 {
1838 	int num_tc = priv->num_tc;
1839 	int tc;
1840 
1841 	for (tc = 0; tc < num_tc; tc++)
1842 		mlx5e_close_tis(priv, tc);
1843 }
1844 
1845 static int
1846 mlx5e_open_rqt(struct mlx5e_priv *priv)
1847 {
1848 	struct mlx5_core_dev *mdev = priv->mdev;
1849 	u32 *in;
1850 	u32 out[MLX5_ST_SZ_DW(create_rqt_out)];
1851 	void *rqtc;
1852 	int inlen;
1853 	int err;
1854 	int sz;
1855 	int i;
1856 
1857 	sz = 1 << priv->params.rx_hash_log_tbl_sz;
1858 
1859 	inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
1860 	in = mlx5_vzalloc(inlen);
1861 	if (in == NULL)
1862 		return (-ENOMEM);
1863 	rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
1864 
1865 	MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
1866 	MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
1867 
1868 	for (i = 0; i < sz; i++) {
1869 		int ix;
1870 #ifdef RSS
1871 		ix = rss_get_indirection_to_bucket(i);
1872 #else
1873 		ix = i;
1874 #endif
1875 		/* ensure we don't overflow */
1876 		ix %= priv->params.num_channels;
1877 		MLX5_SET(rqtc, rqtc, rq_num[i], priv->channel[ix]->rq.rqn);
1878 	}
1879 
1880 	MLX5_SET(create_rqt_in, in, opcode, MLX5_CMD_OP_CREATE_RQT);
1881 
1882 	memset(out, 0, sizeof(out));
1883 	err = mlx5_cmd_exec_check_status(mdev, in, inlen, out, sizeof(out));
1884 	if (!err)
1885 		priv->rqtn = MLX5_GET(create_rqt_out, out, rqtn);
1886 
1887 	kvfree(in);
1888 
1889 	return (err);
1890 }
1891 
1892 static void
1893 mlx5e_close_rqt(struct mlx5e_priv *priv)
1894 {
1895 	u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)];
1896 	u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)];
1897 
1898 	memset(in, 0, sizeof(in));
1899 
1900 	MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT);
1901 	MLX5_SET(destroy_rqt_in, in, rqtn, priv->rqtn);
1902 
1903 	mlx5_cmd_exec_check_status(priv->mdev, in, sizeof(in), out,
1904 	    sizeof(out));
1905 }
1906 
1907 static void
1908 mlx5e_build_tir_ctx(struct mlx5e_priv *priv, u32 * tirc, int tt)
1909 {
1910 	void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
1911 	__be32 *hkey;
1912 
1913 	MLX5_SET(tirc, tirc, transport_domain, priv->tdn);
1914 
1915 #define	ROUGH_MAX_L2_L3_HDR_SZ 256
1916 
1917 #define	MLX5_HASH_IP     (MLX5_HASH_FIELD_SEL_SRC_IP   |\
1918 			  MLX5_HASH_FIELD_SEL_DST_IP)
1919 
1920 #define	MLX5_HASH_ALL    (MLX5_HASH_FIELD_SEL_SRC_IP   |\
1921 			  MLX5_HASH_FIELD_SEL_DST_IP   |\
1922 			  MLX5_HASH_FIELD_SEL_L4_SPORT |\
1923 			  MLX5_HASH_FIELD_SEL_L4_DPORT)
1924 
1925 #define	MLX5_HASH_IP_IPSEC_SPI	(MLX5_HASH_FIELD_SEL_SRC_IP   |\
1926 				 MLX5_HASH_FIELD_SEL_DST_IP   |\
1927 				 MLX5_HASH_FIELD_SEL_IPSEC_SPI)
1928 
1929 	if (priv->params.hw_lro_en) {
1930 		MLX5_SET(tirc, tirc, lro_enable_mask,
1931 		    MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO |
1932 		    MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO);
1933 		MLX5_SET(tirc, tirc, lro_max_msg_sz,
1934 		    (priv->params.lro_wqe_sz -
1935 		    ROUGH_MAX_L2_L3_HDR_SZ) >> 8);
1936 		/* TODO: add the option to choose timer value dynamically */
1937 		MLX5_SET(tirc, tirc, lro_timeout_period_usecs,
1938 		    MLX5_CAP_ETH(priv->mdev,
1939 		    lro_timer_supported_periods[2]));
1940 	}
1941 
1942 	/* setup parameters for hashing TIR type, if any */
1943 	switch (tt) {
1944 	case MLX5E_TT_ANY:
1945 		MLX5_SET(tirc, tirc, disp_type,
1946 		    MLX5_TIRC_DISP_TYPE_DIRECT);
1947 		MLX5_SET(tirc, tirc, inline_rqn,
1948 		    priv->channel[0]->rq.rqn);
1949 		break;
1950 	default:
1951 		MLX5_SET(tirc, tirc, disp_type,
1952 		    MLX5_TIRC_DISP_TYPE_INDIRECT);
1953 		MLX5_SET(tirc, tirc, indirect_table,
1954 		    priv->rqtn);
1955 		MLX5_SET(tirc, tirc, rx_hash_fn,
1956 		    MLX5_TIRC_RX_HASH_FN_HASH_TOEPLITZ);
1957 		hkey = (__be32 *) MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
1958 #ifdef RSS
1959 		/*
1960 		 * The FreeBSD RSS implementation does currently not
1961 		 * support symmetric Toeplitz hashes:
1962 		 */
1963 		MLX5_SET(tirc, tirc, rx_hash_symmetric, 0);
1964 		rss_getkey((uint8_t *)hkey);
1965 #else
1966 		MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
1967 		hkey[0] = cpu_to_be32(0xD181C62C);
1968 		hkey[1] = cpu_to_be32(0xF7F4DB5B);
1969 		hkey[2] = cpu_to_be32(0x1983A2FC);
1970 		hkey[3] = cpu_to_be32(0x943E1ADB);
1971 		hkey[4] = cpu_to_be32(0xD9389E6B);
1972 		hkey[5] = cpu_to_be32(0xD1039C2C);
1973 		hkey[6] = cpu_to_be32(0xA74499AD);
1974 		hkey[7] = cpu_to_be32(0x593D56D9);
1975 		hkey[8] = cpu_to_be32(0xF3253C06);
1976 		hkey[9] = cpu_to_be32(0x2ADC1FFC);
1977 #endif
1978 		break;
1979 	}
1980 
1981 	switch (tt) {
1982 	case MLX5E_TT_IPV4_TCP:
1983 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
1984 		    MLX5_L3_PROT_TYPE_IPV4);
1985 		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
1986 		    MLX5_L4_PROT_TYPE_TCP);
1987 #ifdef RSS
1988 		if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV4)) {
1989 			MLX5_SET(rx_hash_field_select, hfso, selected_fields,
1990 			    MLX5_HASH_IP);
1991 		} else
1992 #endif
1993 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
1994 		    MLX5_HASH_ALL);
1995 		break;
1996 
1997 	case MLX5E_TT_IPV6_TCP:
1998 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
1999 		    MLX5_L3_PROT_TYPE_IPV6);
2000 		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2001 		    MLX5_L4_PROT_TYPE_TCP);
2002 #ifdef RSS
2003 		if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV6)) {
2004 			MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2005 			    MLX5_HASH_IP);
2006 		} else
2007 #endif
2008 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2009 		    MLX5_HASH_ALL);
2010 		break;
2011 
2012 	case MLX5E_TT_IPV4_UDP:
2013 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2014 		    MLX5_L3_PROT_TYPE_IPV4);
2015 		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2016 		    MLX5_L4_PROT_TYPE_UDP);
2017 #ifdef RSS
2018 		if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV4)) {
2019 			MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2020 			    MLX5_HASH_IP);
2021 		} else
2022 #endif
2023 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2024 		    MLX5_HASH_ALL);
2025 		break;
2026 
2027 	case MLX5E_TT_IPV6_UDP:
2028 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2029 		    MLX5_L3_PROT_TYPE_IPV6);
2030 		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2031 		    MLX5_L4_PROT_TYPE_UDP);
2032 #ifdef RSS
2033 		if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV6)) {
2034 			MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2035 			    MLX5_HASH_IP);
2036 		} else
2037 #endif
2038 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2039 		    MLX5_HASH_ALL);
2040 		break;
2041 
2042 	case MLX5E_TT_IPV4_IPSEC_AH:
2043 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2044 		    MLX5_L3_PROT_TYPE_IPV4);
2045 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2046 		    MLX5_HASH_IP_IPSEC_SPI);
2047 		break;
2048 
2049 	case MLX5E_TT_IPV6_IPSEC_AH:
2050 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2051 		    MLX5_L3_PROT_TYPE_IPV6);
2052 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2053 		    MLX5_HASH_IP_IPSEC_SPI);
2054 		break;
2055 
2056 	case MLX5E_TT_IPV4_IPSEC_ESP:
2057 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2058 		    MLX5_L3_PROT_TYPE_IPV4);
2059 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2060 		    MLX5_HASH_IP_IPSEC_SPI);
2061 		break;
2062 
2063 	case MLX5E_TT_IPV6_IPSEC_ESP:
2064 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2065 		    MLX5_L3_PROT_TYPE_IPV6);
2066 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2067 		    MLX5_HASH_IP_IPSEC_SPI);
2068 		break;
2069 
2070 	case MLX5E_TT_IPV4:
2071 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2072 		    MLX5_L3_PROT_TYPE_IPV4);
2073 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2074 		    MLX5_HASH_IP);
2075 		break;
2076 
2077 	case MLX5E_TT_IPV6:
2078 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2079 		    MLX5_L3_PROT_TYPE_IPV6);
2080 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2081 		    MLX5_HASH_IP);
2082 		break;
2083 
2084 	default:
2085 		break;
2086 	}
2087 }
2088 
2089 static int
2090 mlx5e_open_tir(struct mlx5e_priv *priv, int tt)
2091 {
2092 	struct mlx5_core_dev *mdev = priv->mdev;
2093 	u32 *in;
2094 	void *tirc;
2095 	int inlen;
2096 	int err;
2097 
2098 	inlen = MLX5_ST_SZ_BYTES(create_tir_in);
2099 	in = mlx5_vzalloc(inlen);
2100 	if (in == NULL)
2101 		return (-ENOMEM);
2102 	tirc = MLX5_ADDR_OF(create_tir_in, in, tir_context);
2103 
2104 	mlx5e_build_tir_ctx(priv, tirc, tt);
2105 
2106 	err = mlx5_core_create_tir(mdev, in, inlen, &priv->tirn[tt]);
2107 
2108 	kvfree(in);
2109 
2110 	return (err);
2111 }
2112 
2113 static void
2114 mlx5e_close_tir(struct mlx5e_priv *priv, int tt)
2115 {
2116 	mlx5_core_destroy_tir(priv->mdev, priv->tirn[tt]);
2117 }
2118 
2119 static int
2120 mlx5e_open_tirs(struct mlx5e_priv *priv)
2121 {
2122 	int err;
2123 	int i;
2124 
2125 	for (i = 0; i < MLX5E_NUM_TT; i++) {
2126 		err = mlx5e_open_tir(priv, i);
2127 		if (err)
2128 			goto err_close_tirs;
2129 	}
2130 
2131 	return (0);
2132 
2133 err_close_tirs:
2134 	for (i--; i >= 0; i--)
2135 		mlx5e_close_tir(priv, i);
2136 
2137 	return (err);
2138 }
2139 
2140 static void
2141 mlx5e_close_tirs(struct mlx5e_priv *priv)
2142 {
2143 	int i;
2144 
2145 	for (i = 0; i < MLX5E_NUM_TT; i++)
2146 		mlx5e_close_tir(priv, i);
2147 }
2148 
2149 /*
2150  * SW MTU does not include headers,
2151  * HW MTU includes all headers and checksums.
2152  */
2153 static int
2154 mlx5e_set_dev_port_mtu(struct ifnet *ifp, int sw_mtu)
2155 {
2156 	struct mlx5e_priv *priv = ifp->if_softc;
2157 	struct mlx5_core_dev *mdev = priv->mdev;
2158 	int hw_mtu;
2159 	int err;
2160 
2161 
2162 	err = mlx5_set_port_mtu(mdev, MLX5E_SW2HW_MTU(sw_mtu));
2163 	if (err) {
2164 		if_printf(ifp, "%s: mlx5_set_port_mtu failed setting %d, err=%d\n",
2165 		    __func__, sw_mtu, err);
2166 		return (err);
2167 	}
2168 	err = mlx5_query_port_oper_mtu(mdev, &hw_mtu);
2169 	if (!err) {
2170 		ifp->if_mtu = MLX5E_HW2SW_MTU(hw_mtu);
2171 
2172 		if (ifp->if_mtu != sw_mtu) {
2173 			if_printf(ifp, "Port MTU %d is different than "
2174 			    "ifp mtu %d\n", sw_mtu, (int)ifp->if_mtu);
2175 		}
2176 	} else {
2177 		if_printf(ifp, "Query port MTU, after setting new "
2178 		    "MTU value, failed\n");
2179 		ifp->if_mtu = sw_mtu;
2180 	}
2181 	return (0);
2182 }
2183 
2184 int
2185 mlx5e_open_locked(struct ifnet *ifp)
2186 {
2187 	struct mlx5e_priv *priv = ifp->if_softc;
2188 	int err;
2189 
2190 	/* check if already opened */
2191 	if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0)
2192 		return (0);
2193 
2194 #ifdef RSS
2195 	if (rss_getnumbuckets() > priv->params.num_channels) {
2196 		if_printf(ifp, "NOTE: There are more RSS buckets(%u) than "
2197 		    "channels(%u) available\n", rss_getnumbuckets(),
2198 		    priv->params.num_channels);
2199 	}
2200 #endif
2201 	err = mlx5e_open_tises(priv);
2202 	if (err) {
2203 		if_printf(ifp, "%s: mlx5e_open_tises failed, %d\n",
2204 		    __func__, err);
2205 		return (err);
2206 	}
2207 	err = mlx5_vport_alloc_q_counter(priv->mdev, &priv->counter_set_id);
2208 	if (err) {
2209 		if_printf(priv->ifp,
2210 		    "%s: mlx5_vport_alloc_q_counter failed: %d\n",
2211 		    __func__, err);
2212 		goto err_close_tises;
2213 	}
2214 	err = mlx5e_open_channels(priv);
2215 	if (err) {
2216 		if_printf(ifp, "%s: mlx5e_open_channels failed, %d\n",
2217 		    __func__, err);
2218 		goto err_dalloc_q_counter;
2219 	}
2220 	err = mlx5e_open_rqt(priv);
2221 	if (err) {
2222 		if_printf(ifp, "%s: mlx5e_open_rqt failed, %d\n",
2223 		    __func__, err);
2224 		goto err_close_channels;
2225 	}
2226 	err = mlx5e_open_tirs(priv);
2227 	if (err) {
2228 		if_printf(ifp, "%s: mlx5e_open_tir failed, %d\n",
2229 		    __func__, err);
2230 		goto err_close_rqls;
2231 	}
2232 	err = mlx5e_open_flow_table(priv);
2233 	if (err) {
2234 		if_printf(ifp, "%s: mlx5e_open_flow_table failed, %d\n",
2235 		    __func__, err);
2236 		goto err_close_tirs;
2237 	}
2238 	err = mlx5e_add_all_vlan_rules(priv);
2239 	if (err) {
2240 		if_printf(ifp, "%s: mlx5e_add_all_vlan_rules failed, %d\n",
2241 		    __func__, err);
2242 		goto err_close_flow_table;
2243 	}
2244 	set_bit(MLX5E_STATE_OPENED, &priv->state);
2245 
2246 	mlx5e_update_carrier(priv);
2247 	mlx5e_set_rx_mode_core(priv);
2248 
2249 	return (0);
2250 
2251 err_close_flow_table:
2252 	mlx5e_close_flow_table(priv);
2253 
2254 err_close_tirs:
2255 	mlx5e_close_tirs(priv);
2256 
2257 err_close_rqls:
2258 	mlx5e_close_rqt(priv);
2259 
2260 err_close_channels:
2261 	mlx5e_close_channels(priv);
2262 
2263 err_dalloc_q_counter:
2264 	mlx5_vport_dealloc_q_counter(priv->mdev, priv->counter_set_id);
2265 
2266 err_close_tises:
2267 	mlx5e_close_tises(priv);
2268 
2269 	return (err);
2270 }
2271 
2272 static void
2273 mlx5e_open(void *arg)
2274 {
2275 	struct mlx5e_priv *priv = arg;
2276 
2277 	PRIV_LOCK(priv);
2278 	if (mlx5_set_port_status(priv->mdev, MLX5_PORT_UP))
2279 		if_printf(priv->ifp,
2280 		    "%s: Setting port status to up failed\n",
2281 		    __func__);
2282 
2283 	mlx5e_open_locked(priv->ifp);
2284 	priv->ifp->if_drv_flags |= IFF_DRV_RUNNING;
2285 	PRIV_UNLOCK(priv);
2286 }
2287 
2288 int
2289 mlx5e_close_locked(struct ifnet *ifp)
2290 {
2291 	struct mlx5e_priv *priv = ifp->if_softc;
2292 
2293 	/* check if already closed */
2294 	if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
2295 		return (0);
2296 
2297 	clear_bit(MLX5E_STATE_OPENED, &priv->state);
2298 
2299 	mlx5e_set_rx_mode_core(priv);
2300 	mlx5e_del_all_vlan_rules(priv);
2301 	if_link_state_change(priv->ifp, LINK_STATE_DOWN);
2302 	mlx5e_close_flow_table(priv);
2303 	mlx5e_close_tirs(priv);
2304 	mlx5e_close_rqt(priv);
2305 	mlx5e_close_channels(priv);
2306 	mlx5_vport_dealloc_q_counter(priv->mdev, priv->counter_set_id);
2307 	mlx5e_close_tises(priv);
2308 
2309 	return (0);
2310 }
2311 
2312 #if (__FreeBSD_version >= 1100000)
2313 static uint64_t
2314 mlx5e_get_counter(struct ifnet *ifp, ift_counter cnt)
2315 {
2316 	struct mlx5e_priv *priv = ifp->if_softc;
2317 	u64 retval;
2318 
2319 	/* PRIV_LOCK(priv); XXX not allowed */
2320 	switch (cnt) {
2321 	case IFCOUNTER_IPACKETS:
2322 		retval = priv->stats.vport.rx_packets;
2323 		break;
2324 	case IFCOUNTER_IERRORS:
2325 		retval = priv->stats.vport.rx_error_packets;
2326 		break;
2327 	case IFCOUNTER_IQDROPS:
2328 		retval = priv->stats.vport.rx_out_of_buffer;
2329 		break;
2330 	case IFCOUNTER_OPACKETS:
2331 		retval = priv->stats.vport.tx_packets;
2332 		break;
2333 	case IFCOUNTER_OERRORS:
2334 		retval = priv->stats.vport.tx_error_packets;
2335 		break;
2336 	case IFCOUNTER_IBYTES:
2337 		retval = priv->stats.vport.rx_bytes;
2338 		break;
2339 	case IFCOUNTER_OBYTES:
2340 		retval = priv->stats.vport.tx_bytes;
2341 		break;
2342 	case IFCOUNTER_IMCASTS:
2343 		retval = priv->stats.vport.rx_multicast_packets;
2344 		break;
2345 	case IFCOUNTER_OMCASTS:
2346 		retval = priv->stats.vport.tx_multicast_packets;
2347 		break;
2348 	case IFCOUNTER_OQDROPS:
2349 		retval = priv->stats.vport.tx_queue_dropped;
2350 		break;
2351 	default:
2352 		retval = if_get_counter_default(ifp, cnt);
2353 		break;
2354 	}
2355 	/* PRIV_UNLOCK(priv); XXX not allowed */
2356 	return (retval);
2357 }
2358 #endif
2359 
2360 static void
2361 mlx5e_set_rx_mode(struct ifnet *ifp)
2362 {
2363 	struct mlx5e_priv *priv = ifp->if_softc;
2364 
2365 	schedule_work(&priv->set_rx_mode_work);
2366 }
2367 
2368 static int
2369 mlx5e_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
2370 {
2371 	struct mlx5e_priv *priv;
2372 	struct ifreq *ifr;
2373 	struct ifi2creq i2c;
2374 	int error = 0;
2375 	int mask = 0;
2376 	int size_read = 0;
2377 	int module_num;
2378 	int max_mtu;
2379 	uint8_t read_addr;
2380 
2381 	priv = ifp->if_softc;
2382 
2383 	/* check if detaching */
2384 	if (priv == NULL || priv->gone != 0)
2385 		return (ENXIO);
2386 
2387 	switch (command) {
2388 	case SIOCSIFMTU:
2389 		ifr = (struct ifreq *)data;
2390 
2391 		PRIV_LOCK(priv);
2392 		mlx5_query_port_max_mtu(priv->mdev, &max_mtu);
2393 
2394 		if (ifr->ifr_mtu >= MLX5E_MTU_MIN &&
2395 		    ifr->ifr_mtu <= MIN(MLX5E_MTU_MAX, max_mtu)) {
2396 			int was_opened;
2397 
2398 			was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
2399 			if (was_opened)
2400 				mlx5e_close_locked(ifp);
2401 
2402 			/* set new MTU */
2403 			mlx5e_set_dev_port_mtu(ifp, ifr->ifr_mtu);
2404 
2405 			if (was_opened)
2406 				mlx5e_open_locked(ifp);
2407 		} else {
2408 			error = EINVAL;
2409 			if_printf(ifp, "Invalid MTU value. Min val: %d, Max val: %d\n",
2410 			    MLX5E_MTU_MIN, MIN(MLX5E_MTU_MAX, max_mtu));
2411 		}
2412 		PRIV_UNLOCK(priv);
2413 		break;
2414 	case SIOCSIFFLAGS:
2415 		if ((ifp->if_flags & IFF_UP) &&
2416 		    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
2417 			mlx5e_set_rx_mode(ifp);
2418 			break;
2419 		}
2420 		PRIV_LOCK(priv);
2421 		if (ifp->if_flags & IFF_UP) {
2422 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2423 				if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
2424 					mlx5e_open_locked(ifp);
2425 				ifp->if_drv_flags |= IFF_DRV_RUNNING;
2426 				mlx5_set_port_status(priv->mdev, MLX5_PORT_UP);
2427 			}
2428 		} else {
2429 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2430 				mlx5_set_port_status(priv->mdev,
2431 				    MLX5_PORT_DOWN);
2432 				if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0)
2433 					mlx5e_close_locked(ifp);
2434 				mlx5e_update_carrier(priv);
2435 				ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2436 			}
2437 		}
2438 		PRIV_UNLOCK(priv);
2439 		break;
2440 	case SIOCADDMULTI:
2441 	case SIOCDELMULTI:
2442 		mlx5e_set_rx_mode(ifp);
2443 		break;
2444 	case SIOCSIFMEDIA:
2445 	case SIOCGIFMEDIA:
2446 	case SIOCGIFXMEDIA:
2447 		ifr = (struct ifreq *)data;
2448 		error = ifmedia_ioctl(ifp, ifr, &priv->media, command);
2449 		break;
2450 	case SIOCSIFCAP:
2451 		ifr = (struct ifreq *)data;
2452 		PRIV_LOCK(priv);
2453 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2454 
2455 		if (mask & IFCAP_TXCSUM) {
2456 			ifp->if_capenable ^= IFCAP_TXCSUM;
2457 			ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
2458 
2459 			if (IFCAP_TSO4 & ifp->if_capenable &&
2460 			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
2461 				ifp->if_capenable &= ~IFCAP_TSO4;
2462 				ifp->if_hwassist &= ~CSUM_IP_TSO;
2463 				if_printf(ifp,
2464 				    "tso4 disabled due to -txcsum.\n");
2465 			}
2466 		}
2467 		if (mask & IFCAP_TXCSUM_IPV6) {
2468 			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
2469 			ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
2470 
2471 			if (IFCAP_TSO6 & ifp->if_capenable &&
2472 			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
2473 				ifp->if_capenable &= ~IFCAP_TSO6;
2474 				ifp->if_hwassist &= ~CSUM_IP6_TSO;
2475 				if_printf(ifp,
2476 				    "tso6 disabled due to -txcsum6.\n");
2477 			}
2478 		}
2479 		if (mask & IFCAP_RXCSUM)
2480 			ifp->if_capenable ^= IFCAP_RXCSUM;
2481 		if (mask & IFCAP_RXCSUM_IPV6)
2482 			ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
2483 		if (mask & IFCAP_TSO4) {
2484 			if (!(IFCAP_TSO4 & ifp->if_capenable) &&
2485 			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
2486 				if_printf(ifp, "enable txcsum first.\n");
2487 				error = EAGAIN;
2488 				goto out;
2489 			}
2490 			ifp->if_capenable ^= IFCAP_TSO4;
2491 			ifp->if_hwassist ^= CSUM_IP_TSO;
2492 		}
2493 		if (mask & IFCAP_TSO6) {
2494 			if (!(IFCAP_TSO6 & ifp->if_capenable) &&
2495 			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
2496 				if_printf(ifp, "enable txcsum6 first.\n");
2497 				error = EAGAIN;
2498 				goto out;
2499 			}
2500 			ifp->if_capenable ^= IFCAP_TSO6;
2501 			ifp->if_hwassist ^= CSUM_IP6_TSO;
2502 		}
2503 		if (mask & IFCAP_VLAN_HWFILTER) {
2504 			if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
2505 				mlx5e_disable_vlan_filter(priv);
2506 			else
2507 				mlx5e_enable_vlan_filter(priv);
2508 
2509 			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
2510 		}
2511 		if (mask & IFCAP_VLAN_HWTAGGING)
2512 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2513 		if (mask & IFCAP_WOL_MAGIC)
2514 			ifp->if_capenable ^= IFCAP_WOL_MAGIC;
2515 
2516 		VLAN_CAPABILITIES(ifp);
2517 		/* turn off LRO means also turn of HW LRO - if it's on */
2518 		if (mask & IFCAP_LRO) {
2519 			int was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
2520 			bool need_restart = false;
2521 
2522 			ifp->if_capenable ^= IFCAP_LRO;
2523 			if (!(ifp->if_capenable & IFCAP_LRO)) {
2524 				if (priv->params.hw_lro_en) {
2525 					priv->params.hw_lro_en = false;
2526 					need_restart = true;
2527 					/* Not sure this is the correct way */
2528 					priv->params_ethtool.hw_lro = priv->params.hw_lro_en;
2529 				}
2530 			}
2531 			if (was_opened && need_restart) {
2532 				mlx5e_close_locked(ifp);
2533 				mlx5e_open_locked(ifp);
2534 			}
2535 		}
2536 out:
2537 		PRIV_UNLOCK(priv);
2538 		break;
2539 
2540 	case SIOCGI2C:
2541 		ifr = (struct ifreq *)data;
2542 
2543 		/*
2544 		 * Copy from the user-space address ifr_data to the
2545 		 * kernel-space address i2c
2546 		 */
2547 		error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
2548 		if (error)
2549 			break;
2550 
2551 		if (i2c.len > sizeof(i2c.data)) {
2552 			error = EINVAL;
2553 			break;
2554 		}
2555 
2556 		PRIV_LOCK(priv);
2557 		/* Get module_num which is required for the query_eeprom */
2558 		error = mlx5_query_module_num(priv->mdev, &module_num);
2559 		if (error) {
2560 			if_printf(ifp, "Query module num failed, eeprom "
2561 			    "reading is not supported\n");
2562 			error = EINVAL;
2563 			goto err_i2c;
2564 		}
2565 		/* Check if module is present before doing an access */
2566 		if (mlx5_query_module_status(priv->mdev, module_num) !=
2567 		    MLX5_MODULE_STATUS_PLUGGED) {
2568 			error = EINVAL;
2569 			goto err_i2c;
2570 		}
2571 		/*
2572 		 * Currently 0XA0 and 0xA2 are the only addresses permitted.
2573 		 * The internal conversion is as follows:
2574 		 */
2575 		if (i2c.dev_addr == 0xA0)
2576 			read_addr = MLX5E_I2C_ADDR_LOW;
2577 		else if (i2c.dev_addr == 0xA2)
2578 			read_addr = MLX5E_I2C_ADDR_HIGH;
2579 		else {
2580 			if_printf(ifp, "Query eeprom failed, "
2581 			    "Invalid Address: %X\n", i2c.dev_addr);
2582 			error = EINVAL;
2583 			goto err_i2c;
2584 		}
2585 		error = mlx5_query_eeprom(priv->mdev,
2586 		    read_addr, MLX5E_EEPROM_LOW_PAGE,
2587 		    (uint32_t)i2c.offset, (uint32_t)i2c.len, module_num,
2588 		    (uint32_t *)i2c.data, &size_read);
2589 		if (error) {
2590 			if_printf(ifp, "Query eeprom failed, eeprom "
2591 			    "reading is not supported\n");
2592 			error = EINVAL;
2593 			goto err_i2c;
2594 		}
2595 
2596 		if (i2c.len > MLX5_EEPROM_MAX_BYTES) {
2597 			error = mlx5_query_eeprom(priv->mdev,
2598 			    read_addr, MLX5E_EEPROM_LOW_PAGE,
2599 			    (uint32_t)(i2c.offset + size_read),
2600 			    (uint32_t)(i2c.len - size_read), module_num,
2601 			    (uint32_t *)(i2c.data + size_read), &size_read);
2602 		}
2603 		if (error) {
2604 			if_printf(ifp, "Query eeprom failed, eeprom "
2605 			    "reading is not supported\n");
2606 			error = EINVAL;
2607 			goto err_i2c;
2608 		}
2609 
2610 		error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
2611 err_i2c:
2612 		PRIV_UNLOCK(priv);
2613 		break;
2614 
2615 	default:
2616 		error = ether_ioctl(ifp, command, data);
2617 		break;
2618 	}
2619 	return (error);
2620 }
2621 
2622 static int
2623 mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
2624 {
2625 	/*
2626 	 * TODO: uncoment once FW really sets all these bits if
2627 	 * (!mdev->caps.eth.rss_ind_tbl_cap || !mdev->caps.eth.csum_cap ||
2628 	 * !mdev->caps.eth.max_lso_cap || !mdev->caps.eth.vlan_cap ||
2629 	 * !(mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_SCQE_BRK_MOD)) return
2630 	 * -ENOTSUPP;
2631 	 */
2632 
2633 	/* TODO: add more must-to-have features */
2634 
2635 	return (0);
2636 }
2637 
2638 static void
2639 mlx5e_build_ifp_priv(struct mlx5_core_dev *mdev,
2640     struct mlx5e_priv *priv,
2641     int num_comp_vectors)
2642 {
2643 	/*
2644 	 * TODO: Consider link speed for setting "log_sq_size",
2645 	 * "log_rq_size" and "cq_moderation_xxx":
2646 	 */
2647 	priv->params.log_sq_size =
2648 	    MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
2649 	priv->params.log_rq_size =
2650 	    MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
2651 	priv->params.rx_cq_moderation_usec =
2652 	    MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
2653 	    MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE :
2654 	    MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC;
2655 	priv->params.rx_cq_moderation_mode =
2656 	    MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? 1 : 0;
2657 	priv->params.rx_cq_moderation_pkts =
2658 	    MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS;
2659 	priv->params.tx_cq_moderation_usec =
2660 	    MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC;
2661 	priv->params.tx_cq_moderation_pkts =
2662 	    MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
2663 	priv->params.min_rx_wqes =
2664 	    MLX5E_PARAMS_DEFAULT_MIN_RX_WQES;
2665 	priv->params.rx_hash_log_tbl_sz =
2666 	    (order_base_2(num_comp_vectors) >
2667 	    MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ) ?
2668 	    order_base_2(num_comp_vectors) :
2669 	    MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ;
2670 	priv->params.num_tc = 1;
2671 	priv->params.default_vlan_prio = 0;
2672 	priv->counter_set_id = -1;
2673 
2674 	/*
2675 	 * hw lro is currently defaulted to off. when it won't anymore we
2676 	 * will consider the HW capability: "!!MLX5_CAP_ETH(mdev, lro_cap)"
2677 	 */
2678 	priv->params.hw_lro_en = false;
2679 	priv->params.lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
2680 
2681 	priv->params.cqe_zipping_en = !!MLX5_CAP_GEN(mdev, cqe_compression);
2682 
2683 	priv->mdev = mdev;
2684 	priv->params.num_channels = num_comp_vectors;
2685 	priv->order_base_2_num_channels = order_base_2(num_comp_vectors);
2686 	priv->queue_mapping_channel_mask =
2687 	    roundup_pow_of_two(num_comp_vectors) - 1;
2688 	priv->num_tc = priv->params.num_tc;
2689 	priv->default_vlan_prio = priv->params.default_vlan_prio;
2690 
2691 	INIT_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
2692 	INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work);
2693 	INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work);
2694 }
2695 
2696 static int
2697 mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn,
2698     struct mlx5_core_mr *mr)
2699 {
2700 	struct ifnet *ifp = priv->ifp;
2701 	struct mlx5_core_dev *mdev = priv->mdev;
2702 	struct mlx5_create_mkey_mbox_in *in;
2703 	int err;
2704 
2705 	in = mlx5_vzalloc(sizeof(*in));
2706 	if (in == NULL) {
2707 		if_printf(ifp, "%s: failed to allocate inbox\n", __func__);
2708 		return (-ENOMEM);
2709 	}
2710 	in->seg.flags = MLX5_PERM_LOCAL_WRITE |
2711 	    MLX5_PERM_LOCAL_READ |
2712 	    MLX5_ACCESS_MODE_PA;
2713 	in->seg.flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64);
2714 	in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
2715 
2716 	err = mlx5_core_create_mkey(mdev, mr, in, sizeof(*in), NULL, NULL,
2717 	    NULL);
2718 	if (err)
2719 		if_printf(ifp, "%s: mlx5_core_create_mkey failed, %d\n",
2720 		    __func__, err);
2721 
2722 	kvfree(in);
2723 
2724 	return (err);
2725 }
2726 
2727 static const char *mlx5e_vport_stats_desc[] = {
2728 	MLX5E_VPORT_STATS(MLX5E_STATS_DESC)
2729 };
2730 
2731 static const char *mlx5e_pport_stats_desc[] = {
2732 	MLX5E_PPORT_STATS(MLX5E_STATS_DESC)
2733 };
2734 
2735 static void
2736 mlx5e_priv_mtx_init(struct mlx5e_priv *priv)
2737 {
2738 	mtx_init(&priv->async_events_mtx, "mlx5async", MTX_NETWORK_LOCK, MTX_DEF);
2739 	sx_init(&priv->state_lock, "mlx5state");
2740 	callout_init_mtx(&priv->watchdog, &priv->async_events_mtx, 0);
2741 }
2742 
2743 static void
2744 mlx5e_priv_mtx_destroy(struct mlx5e_priv *priv)
2745 {
2746 	mtx_destroy(&priv->async_events_mtx);
2747 	sx_destroy(&priv->state_lock);
2748 }
2749 
2750 static int
2751 sysctl_firmware(SYSCTL_HANDLER_ARGS)
2752 {
2753 	/*
2754 	 * %d.%d%.d the string format.
2755 	 * fw_rev_{maj,min,sub} return u16, 2^16 = 65536.
2756 	 * We need at most 5 chars to store that.
2757 	 * It also has: two "." and NULL at the end, which means we need 18
2758 	 * (5*3 + 3) chars at most.
2759 	 */
2760 	char fw[18];
2761 	struct mlx5e_priv *priv = arg1;
2762 	int error;
2763 
2764 	snprintf(fw, sizeof(fw), "%d.%d.%d", fw_rev_maj(priv->mdev), fw_rev_min(priv->mdev),
2765 	    fw_rev_sub(priv->mdev));
2766 	error = sysctl_handle_string(oidp, fw, sizeof(fw), req);
2767 	return (error);
2768 }
2769 
2770 static void
2771 mlx5e_add_hw_stats(struct mlx5e_priv *priv)
2772 {
2773 	SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw),
2774 	    OID_AUTO, "fw_version", CTLTYPE_STRING | CTLFLAG_RD, priv, 0,
2775 	    sysctl_firmware, "A", "HCA firmware version");
2776 
2777 	SYSCTL_ADD_STRING(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw),
2778 	    OID_AUTO, "board_id", CTLFLAG_RD, priv->mdev->board_id, 0,
2779 	    "Board ID");
2780 }
2781 
2782 static void
2783 mlx5e_setup_pauseframes(struct mlx5e_priv *priv)
2784 {
2785 #if (__FreeBSD_version < 1100000)
2786 	char path[64];
2787 
2788 #endif
2789 	/* Only receiving pauseframes is enabled by default */
2790 	priv->params.tx_pauseframe_control = 0;
2791 	priv->params.rx_pauseframe_control = 1;
2792 
2793 #if (__FreeBSD_version < 1100000)
2794 	/* compute path for sysctl */
2795 	snprintf(path, sizeof(path), "dev.mce.%d.tx_pauseframe_control",
2796 	    device_get_unit(priv->mdev->pdev->dev.bsddev));
2797 
2798 	/* try to fetch tunable, if any */
2799 	TUNABLE_INT_FETCH(path, &priv->params.tx_pauseframe_control);
2800 
2801 	/* compute path for sysctl */
2802 	snprintf(path, sizeof(path), "dev.mce.%d.rx_pauseframe_control",
2803 	    device_get_unit(priv->mdev->pdev->dev.bsddev));
2804 
2805 	/* try to fetch tunable, if any */
2806 	TUNABLE_INT_FETCH(path, &priv->params.rx_pauseframe_control);
2807 #endif
2808 
2809 	/* register pausframe SYSCTLs */
2810 	SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
2811 	    OID_AUTO, "tx_pauseframe_control", CTLFLAG_RDTUN,
2812 	    &priv->params.tx_pauseframe_control, 0,
2813 	    "Set to enable TX pause frames. Clear to disable.");
2814 
2815 	SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
2816 	    OID_AUTO, "rx_pauseframe_control", CTLFLAG_RDTUN,
2817 	    &priv->params.rx_pauseframe_control, 0,
2818 	    "Set to enable RX pause frames. Clear to disable.");
2819 
2820 	/* range check */
2821 	priv->params.tx_pauseframe_control =
2822 	    priv->params.tx_pauseframe_control ? 1 : 0;
2823 	priv->params.rx_pauseframe_control =
2824 	    priv->params.rx_pauseframe_control ? 1 : 0;
2825 
2826 	/* update firmware */
2827 	mlx5_set_port_pause(priv->mdev, 1,
2828 	    priv->params.rx_pauseframe_control,
2829 	    priv->params.tx_pauseframe_control);
2830 }
2831 
2832 static void *
2833 mlx5e_create_ifp(struct mlx5_core_dev *mdev)
2834 {
2835 	static volatile int mlx5_en_unit;
2836 	struct ifnet *ifp;
2837 	struct mlx5e_priv *priv;
2838 	u8 dev_addr[ETHER_ADDR_LEN] __aligned(4);
2839 	struct sysctl_oid_list *child;
2840 	int ncv = mdev->priv.eq_table.num_comp_vectors;
2841 	char unit[16];
2842 	int err;
2843 	int i;
2844 	u32 eth_proto_cap;
2845 
2846 	if (mlx5e_check_required_hca_cap(mdev)) {
2847 		mlx5_core_dbg(mdev, "mlx5e_check_required_hca_cap() failed\n");
2848 		return (NULL);
2849 	}
2850 	priv = malloc(sizeof(*priv), M_MLX5EN, M_WAITOK | M_ZERO);
2851 	if (priv == NULL) {
2852 		mlx5_core_err(mdev, "malloc() failed\n");
2853 		return (NULL);
2854 	}
2855 	mlx5e_priv_mtx_init(priv);
2856 
2857 	ifp = priv->ifp = if_alloc(IFT_ETHER);
2858 	if (ifp == NULL) {
2859 		mlx5_core_err(mdev, "if_alloc() failed\n");
2860 		goto err_free_priv;
2861 	}
2862 	ifp->if_softc = priv;
2863 	if_initname(ifp, "mce", atomic_fetchadd_int(&mlx5_en_unit, 1));
2864 	ifp->if_mtu = ETHERMTU;
2865 	ifp->if_init = mlx5e_open;
2866 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
2867 	ifp->if_ioctl = mlx5e_ioctl;
2868 	ifp->if_transmit = mlx5e_xmit;
2869 	ifp->if_qflush = if_qflush;
2870 #if (__FreeBSD_version >= 1100000)
2871 	ifp->if_get_counter = mlx5e_get_counter;
2872 #endif
2873 	ifp->if_snd.ifq_maxlen = ifqmaxlen;
2874 	/*
2875          * Set driver features
2876          */
2877 	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6;
2878 	ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING;
2879 	ifp->if_capabilities |= IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWFILTER;
2880 	ifp->if_capabilities |= IFCAP_LINKSTATE | IFCAP_JUMBO_MTU;
2881 	ifp->if_capabilities |= IFCAP_LRO;
2882 	ifp->if_capabilities |= IFCAP_TSO | IFCAP_VLAN_HWTSO;
2883 
2884 	/* set TSO limits so that we don't have to drop TX packets */
2885 	ifp->if_hw_tsomax = MLX5E_MAX_TX_PAYLOAD_SIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
2886 	ifp->if_hw_tsomaxsegcount = MLX5E_MAX_TX_MBUF_FRAGS - 1 /* hdr */;
2887 	ifp->if_hw_tsomaxsegsize = MLX5E_MAX_TX_MBUF_SIZE;
2888 
2889 	ifp->if_capenable = ifp->if_capabilities;
2890 	ifp->if_hwassist = 0;
2891 	if (ifp->if_capenable & IFCAP_TSO)
2892 		ifp->if_hwassist |= CSUM_TSO;
2893 	if (ifp->if_capenable & IFCAP_TXCSUM)
2894 		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP);
2895 	if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
2896 		ifp->if_hwassist |= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
2897 
2898 	/* ifnet sysctl tree */
2899 	sysctl_ctx_init(&priv->sysctl_ctx);
2900 	priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_dev),
2901 	    OID_AUTO, ifp->if_dname, CTLFLAG_RD, 0, "MLX5 ethernet - interface name");
2902 	if (priv->sysctl_ifnet == NULL) {
2903 		mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
2904 		goto err_free_sysctl;
2905 	}
2906 	snprintf(unit, sizeof(unit), "%d", ifp->if_dunit);
2907 	priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
2908 	    OID_AUTO, unit, CTLFLAG_RD, 0, "MLX5 ethernet - interface unit");
2909 	if (priv->sysctl_ifnet == NULL) {
2910 		mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
2911 		goto err_free_sysctl;
2912 	}
2913 
2914 	/* HW sysctl tree */
2915 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(mdev->pdev->dev.bsddev));
2916 	priv->sysctl_hw = SYSCTL_ADD_NODE(&priv->sysctl_ctx, child,
2917 	    OID_AUTO, "hw", CTLFLAG_RD, 0, "MLX5 ethernet dev hw");
2918 	if (priv->sysctl_hw == NULL) {
2919 		mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
2920 		goto err_free_sysctl;
2921 	}
2922 	mlx5e_build_ifp_priv(mdev, priv, ncv);
2923 	err = mlx5_alloc_map_uar(mdev, &priv->cq_uar);
2924 	if (err) {
2925 		if_printf(ifp, "%s: mlx5_alloc_map_uar failed, %d\n",
2926 		    __func__, err);
2927 		goto err_free_sysctl;
2928 	}
2929 	err = mlx5_core_alloc_pd(mdev, &priv->pdn);
2930 	if (err) {
2931 		if_printf(ifp, "%s: mlx5_core_alloc_pd failed, %d\n",
2932 		    __func__, err);
2933 		goto err_unmap_free_uar;
2934 	}
2935 	err = mlx5_alloc_transport_domain(mdev, &priv->tdn);
2936 	if (err) {
2937 		if_printf(ifp, "%s: mlx5_alloc_transport_domain failed, %d\n",
2938 		    __func__, err);
2939 		goto err_dealloc_pd;
2940 	}
2941 	err = mlx5e_create_mkey(priv, priv->pdn, &priv->mr);
2942 	if (err) {
2943 		if_printf(ifp, "%s: mlx5e_create_mkey failed, %d\n",
2944 		    __func__, err);
2945 		goto err_dealloc_transport_domain;
2946 	}
2947 	mlx5_query_nic_vport_mac_address(priv->mdev, 0, dev_addr);
2948 
2949 	/* set default MTU */
2950 	mlx5e_set_dev_port_mtu(ifp, ifp->if_mtu);
2951 
2952 	/* Set desc */
2953 	device_set_desc(mdev->pdev->dev.bsddev, mlx5e_version);
2954 
2955 	/* Set default media status */
2956 	priv->media_status_last = IFM_AVALID;
2957 	priv->media_active_last = IFM_ETHER | IFM_AUTO |
2958 	    IFM_ETH_RXPAUSE | IFM_FDX;
2959 
2960 	/* setup default pauseframes configuration */
2961 	mlx5e_setup_pauseframes(priv);
2962 
2963 	err = mlx5_query_port_proto_cap(mdev, &eth_proto_cap, MLX5_PTYS_EN);
2964 	if (err) {
2965 		eth_proto_cap = 0;
2966 		if_printf(ifp, "%s: Query port media capability failed, %d\n",
2967 		    __func__, err);
2968 	}
2969 
2970 	/* Setup supported medias */
2971 	ifmedia_init(&priv->media, IFM_IMASK | IFM_ETH_FMASK,
2972 	    mlx5e_media_change, mlx5e_media_status);
2973 
2974 	for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
2975 		if (mlx5e_mode_table[i].baudrate == 0)
2976 			continue;
2977 		if (MLX5E_PROT_MASK(i) & eth_proto_cap) {
2978 			ifmedia_add(&priv->media,
2979 			    mlx5e_mode_table[i].subtype |
2980 			    IFM_ETHER, 0, NULL);
2981 			ifmedia_add(&priv->media,
2982 			    mlx5e_mode_table[i].subtype |
2983 			    IFM_ETHER | IFM_FDX |
2984 			    IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL);
2985 		}
2986 	}
2987 
2988 	ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2989 	ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO | IFM_FDX |
2990 	    IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL);
2991 
2992 	/* Set autoselect by default */
2993 	ifmedia_set(&priv->media, IFM_ETHER | IFM_AUTO | IFM_FDX |
2994 	    IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE);
2995 	ether_ifattach(ifp, dev_addr);
2996 
2997 	/* Register for VLAN events */
2998 	priv->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
2999 	    mlx5e_vlan_rx_add_vid, priv, EVENTHANDLER_PRI_FIRST);
3000 	priv->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
3001 	    mlx5e_vlan_rx_kill_vid, priv, EVENTHANDLER_PRI_FIRST);
3002 
3003 	/* Link is down by default */
3004 	if_link_state_change(ifp, LINK_STATE_DOWN);
3005 
3006 	mlx5e_enable_async_events(priv);
3007 
3008 	mlx5e_add_hw_stats(priv);
3009 
3010 	mlx5e_create_stats(&priv->stats.vport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3011 	    "vstats", mlx5e_vport_stats_desc, MLX5E_VPORT_STATS_NUM,
3012 	    priv->stats.vport.arg);
3013 
3014 	mlx5e_create_stats(&priv->stats.pport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3015 	    "pstats", mlx5e_pport_stats_desc, MLX5E_PPORT_STATS_NUM,
3016 	    priv->stats.pport.arg);
3017 
3018 	mlx5e_create_ethtool(priv);
3019 
3020 	mtx_lock(&priv->async_events_mtx);
3021 	mlx5e_update_stats(priv);
3022 	mtx_unlock(&priv->async_events_mtx);
3023 
3024 	return (priv);
3025 
3026 err_dealloc_transport_domain:
3027 	mlx5_dealloc_transport_domain(mdev, priv->tdn);
3028 
3029 err_dealloc_pd:
3030 	mlx5_core_dealloc_pd(mdev, priv->pdn);
3031 
3032 err_unmap_free_uar:
3033 	mlx5_unmap_free_uar(mdev, &priv->cq_uar);
3034 
3035 err_free_sysctl:
3036 	sysctl_ctx_free(&priv->sysctl_ctx);
3037 
3038 	if_free(ifp);
3039 
3040 err_free_priv:
3041 	mlx5e_priv_mtx_destroy(priv);
3042 	free(priv, M_MLX5EN);
3043 	return (NULL);
3044 }
3045 
3046 static void
3047 mlx5e_destroy_ifp(struct mlx5_core_dev *mdev, void *vpriv)
3048 {
3049 	struct mlx5e_priv *priv = vpriv;
3050 	struct ifnet *ifp = priv->ifp;
3051 
3052 	/* don't allow more IOCTLs */
3053 	priv->gone = 1;
3054 
3055 	/* XXX wait a bit to allow IOCTL handlers to complete */
3056 	pause("W", hz);
3057 
3058 	/* stop watchdog timer */
3059 	callout_drain(&priv->watchdog);
3060 
3061 	if (priv->vlan_attach != NULL)
3062 		EVENTHANDLER_DEREGISTER(vlan_config, priv->vlan_attach);
3063 	if (priv->vlan_detach != NULL)
3064 		EVENTHANDLER_DEREGISTER(vlan_unconfig, priv->vlan_detach);
3065 
3066 	/* make sure device gets closed */
3067 	PRIV_LOCK(priv);
3068 	mlx5e_close_locked(ifp);
3069 	PRIV_UNLOCK(priv);
3070 
3071 	/* unregister device */
3072 	ifmedia_removeall(&priv->media);
3073 	ether_ifdetach(ifp);
3074 	if_free(ifp);
3075 
3076 	/* destroy all remaining sysctl nodes */
3077 	if (priv->sysctl_debug)
3078 		sysctl_ctx_free(&priv->stats.port_stats_debug.ctx);
3079 	sysctl_ctx_free(&priv->stats.vport.ctx);
3080 	sysctl_ctx_free(&priv->stats.pport.ctx);
3081 	sysctl_ctx_free(&priv->sysctl_ctx);
3082 
3083 	mlx5_core_destroy_mkey(priv->mdev, &priv->mr);
3084 	mlx5_dealloc_transport_domain(priv->mdev, priv->tdn);
3085 	mlx5_core_dealloc_pd(priv->mdev, priv->pdn);
3086 	mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar);
3087 	mlx5e_disable_async_events(priv);
3088 	flush_scheduled_work();
3089 	mlx5e_priv_mtx_destroy(priv);
3090 	free(priv, M_MLX5EN);
3091 }
3092 
3093 static void *
3094 mlx5e_get_ifp(void *vpriv)
3095 {
3096 	struct mlx5e_priv *priv = vpriv;
3097 
3098 	return (priv->ifp);
3099 }
3100 
3101 static struct mlx5_interface mlx5e_interface = {
3102 	.add = mlx5e_create_ifp,
3103 	.remove = mlx5e_destroy_ifp,
3104 	.event = mlx5e_async_event,
3105 	.protocol = MLX5_INTERFACE_PROTOCOL_ETH,
3106 	.get_dev = mlx5e_get_ifp,
3107 };
3108 
3109 void
3110 mlx5e_init(void)
3111 {
3112 	mlx5_register_interface(&mlx5e_interface);
3113 }
3114 
3115 void
3116 mlx5e_cleanup(void)
3117 {
3118 	mlx5_unregister_interface(&mlx5e_interface);
3119 }
3120 
3121 module_init_order(mlx5e_init, SI_ORDER_THIRD);
3122 module_exit_order(mlx5e_cleanup, SI_ORDER_THIRD);
3123 
3124 #if (__FreeBSD_version >= 1100000)
3125 MODULE_DEPEND(mlx5en, linuxkpi, 1, 1, 1);
3126 #endif
3127 MODULE_DEPEND(mlx5en, mlx5, 1, 1, 1);
3128 MODULE_VERSION(mlx5en, 1);
3129