xref: /freebsd/sys/dev/mlx5/mlx5_en/mlx5_en_main.c (revision 52f72944b8f5abb2386eae924357dee8aea17d5b)
1 /*-
2  * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  *
25  * $FreeBSD$
26  */
27 
28 #include "en.h"
29 
30 #include <sys/sockio.h>
31 #include <machine/atomic.h>
32 
33 #define	ETH_DRIVER_VERSION	"3.1.0-dev"
34 char mlx5e_version[] = "Mellanox Ethernet driver"
35     " (" ETH_DRIVER_VERSION ")";
36 
37 struct mlx5e_channel_param {
38 	struct mlx5e_rq_param rq;
39 	struct mlx5e_sq_param sq;
40 	struct mlx5e_cq_param rx_cq;
41 	struct mlx5e_cq_param tx_cq;
42 };
43 
44 static const struct {
45 	u32	subtype;
46 	u64	baudrate;
47 }	mlx5e_mode_table[MLX5E_LINK_MODES_NUMBER] = {
48 
49 	[MLX5E_1000BASE_CX_SGMII] = {
50 		.subtype = IFM_1000_CX_SGMII,
51 		.baudrate = IF_Mbps(1000ULL),
52 	},
53 	[MLX5E_1000BASE_KX] = {
54 		.subtype = IFM_1000_KX,
55 		.baudrate = IF_Mbps(1000ULL),
56 	},
57 	[MLX5E_10GBASE_CX4] = {
58 		.subtype = IFM_10G_CX4,
59 		.baudrate = IF_Gbps(10ULL),
60 	},
61 	[MLX5E_10GBASE_KX4] = {
62 		.subtype = IFM_10G_KX4,
63 		.baudrate = IF_Gbps(10ULL),
64 	},
65 	[MLX5E_10GBASE_KR] = {
66 		.subtype = IFM_10G_KR,
67 		.baudrate = IF_Gbps(10ULL),
68 	},
69 	[MLX5E_20GBASE_KR2] = {
70 		.subtype = IFM_20G_KR2,
71 		.baudrate = IF_Gbps(20ULL),
72 	},
73 	[MLX5E_40GBASE_CR4] = {
74 		.subtype = IFM_40G_CR4,
75 		.baudrate = IF_Gbps(40ULL),
76 	},
77 	[MLX5E_40GBASE_KR4] = {
78 		.subtype = IFM_40G_KR4,
79 		.baudrate = IF_Gbps(40ULL),
80 	},
81 	[MLX5E_56GBASE_R4] = {
82 		.subtype = IFM_56G_R4,
83 		.baudrate = IF_Gbps(56ULL),
84 	},
85 	[MLX5E_10GBASE_CR] = {
86 		.subtype = IFM_10G_CR1,
87 		.baudrate = IF_Gbps(10ULL),
88 	},
89 	[MLX5E_10GBASE_SR] = {
90 		.subtype = IFM_10G_SR,
91 		.baudrate = IF_Gbps(10ULL),
92 	},
93 	[MLX5E_10GBASE_ER] = {
94 		.subtype = IFM_10G_ER,
95 		.baudrate = IF_Gbps(10ULL),
96 	},
97 	[MLX5E_40GBASE_SR4] = {
98 		.subtype = IFM_40G_SR4,
99 		.baudrate = IF_Gbps(40ULL),
100 	},
101 	[MLX5E_40GBASE_LR4] = {
102 		.subtype = IFM_40G_LR4,
103 		.baudrate = IF_Gbps(40ULL),
104 	},
105 	[MLX5E_100GBASE_CR4] = {
106 		.subtype = IFM_100G_CR4,
107 		.baudrate = IF_Gbps(100ULL),
108 	},
109 	[MLX5E_100GBASE_SR4] = {
110 		.subtype = IFM_100G_SR4,
111 		.baudrate = IF_Gbps(100ULL),
112 	},
113 	[MLX5E_100GBASE_KR4] = {
114 		.subtype = IFM_100G_KR4,
115 		.baudrate = IF_Gbps(100ULL),
116 	},
117 	[MLX5E_100GBASE_LR4] = {
118 		.subtype = IFM_100G_LR4,
119 		.baudrate = IF_Gbps(100ULL),
120 	},
121 	[MLX5E_100BASE_TX] = {
122 		.subtype = IFM_100_TX,
123 		.baudrate = IF_Mbps(100ULL),
124 	},
125 	[MLX5E_1000BASE_T] = {
126 		.subtype = IFM_1000_T,
127 		.baudrate = IF_Mbps(1000ULL),
128 	},
129 	[MLX5E_10GBASE_T] = {
130 		.subtype = IFM_10G_T,
131 		.baudrate = IF_Gbps(10ULL),
132 	},
133 	[MLX5E_25GBASE_CR] = {
134 		.subtype = IFM_25G_CR,
135 		.baudrate = IF_Gbps(25ULL),
136 	},
137 	[MLX5E_25GBASE_KR] = {
138 		.subtype = IFM_25G_KR,
139 		.baudrate = IF_Gbps(25ULL),
140 	},
141 	[MLX5E_25GBASE_SR] = {
142 		.subtype = IFM_25G_SR,
143 		.baudrate = IF_Gbps(25ULL),
144 	},
145 	[MLX5E_50GBASE_CR2] = {
146 		.subtype = IFM_50G_CR2,
147 		.baudrate = IF_Gbps(50ULL),
148 	},
149 	[MLX5E_50GBASE_KR2] = {
150 		.subtype = IFM_50G_KR2,
151 		.baudrate = IF_Gbps(50ULL),
152 	},
153 };
154 
155 MALLOC_DEFINE(M_MLX5EN, "MLX5EN", "MLX5 Ethernet");
156 
157 static SYSCTL_NODE(_hw, OID_AUTO, mlx5, CTLFLAG_RW, 0, "MLX5 driver parameters");
158 
159 static void
160 mlx5e_update_carrier(struct mlx5e_priv *priv)
161 {
162 	struct mlx5_core_dev *mdev = priv->mdev;
163 	u32 out[MLX5_ST_SZ_DW(ptys_reg)];
164 	u32 eth_proto_oper;
165 	int error;
166 	u8 port_state;
167 	u8 i;
168 
169 	port_state = mlx5_query_vport_state(mdev,
170 	    MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT, 0);
171 
172 	if (port_state == VPORT_STATE_UP) {
173 		priv->media_status_last |= IFM_ACTIVE;
174 	} else {
175 		priv->media_status_last &= ~IFM_ACTIVE;
176 		priv->media_active_last = IFM_ETHER;
177 		if_link_state_change(priv->ifp, LINK_STATE_DOWN);
178 		return;
179 	}
180 
181 	error = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1);
182 	if (error) {
183 		priv->media_active_last = IFM_ETHER;
184 		priv->ifp->if_baudrate = 1;
185 		if_printf(priv->ifp, "%s: query port ptys failed: 0x%x\n",
186 		    __func__, error);
187 		return;
188 	}
189 	eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper);
190 
191 	for (i = 0; i != MLX5E_LINK_MODES_NUMBER; i++) {
192 		if (mlx5e_mode_table[i].baudrate == 0)
193 			continue;
194 		if (MLX5E_PROT_MASK(i) & eth_proto_oper) {
195 			priv->ifp->if_baudrate =
196 			    mlx5e_mode_table[i].baudrate;
197 			priv->media_active_last =
198 			    mlx5e_mode_table[i].subtype | IFM_ETHER | IFM_FDX;
199 		}
200 	}
201 	if_link_state_change(priv->ifp, LINK_STATE_UP);
202 }
203 
204 static void
205 mlx5e_media_status(struct ifnet *dev, struct ifmediareq *ifmr)
206 {
207 	struct mlx5e_priv *priv = dev->if_softc;
208 
209 	ifmr->ifm_status = priv->media_status_last;
210 	ifmr->ifm_active = priv->media_active_last |
211 	    (priv->params.rx_pauseframe_control ? IFM_ETH_RXPAUSE : 0) |
212 	    (priv->params.tx_pauseframe_control ? IFM_ETH_TXPAUSE : 0);
213 
214 }
215 
216 static u32
217 mlx5e_find_link_mode(u32 subtype)
218 {
219 	u32 i;
220 	u32 link_mode = 0;
221 
222 	for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
223 		if (mlx5e_mode_table[i].baudrate == 0)
224 			continue;
225 		if (mlx5e_mode_table[i].subtype == subtype)
226 			link_mode |= MLX5E_PROT_MASK(i);
227 	}
228 
229 	return (link_mode);
230 }
231 
232 static int
233 mlx5e_set_port_pause_and_pfc(struct mlx5e_priv *priv)
234 {
235 	return (mlx5_set_port_pause_and_pfc(priv->mdev, 1,
236 	    priv->params.rx_pauseframe_control,
237 	    priv->params.tx_pauseframe_control,
238 	    priv->params.rx_priority_flow_control,
239 	    priv->params.tx_priority_flow_control));
240 }
241 
242 static int
243 mlx5e_set_port_pfc(struct mlx5e_priv *priv)
244 {
245 	int error;
246 
247 	if (priv->params.rx_pauseframe_control ||
248 	    priv->params.tx_pauseframe_control) {
249 		if_printf(priv->ifp,
250 		    "Global pauseframes must be disabled before enabling PFC.\n");
251 		error = -EINVAL;
252 	} else {
253 		error = mlx5e_set_port_pause_and_pfc(priv);
254 	}
255 	return (error);
256 }
257 
258 static int
259 mlx5e_media_change(struct ifnet *dev)
260 {
261 	struct mlx5e_priv *priv = dev->if_softc;
262 	struct mlx5_core_dev *mdev = priv->mdev;
263 	u32 eth_proto_cap;
264 	u32 link_mode;
265 	int was_opened;
266 	int locked;
267 	int error;
268 
269 	locked = PRIV_LOCKED(priv);
270 	if (!locked)
271 		PRIV_LOCK(priv);
272 
273 	if (IFM_TYPE(priv->media.ifm_media) != IFM_ETHER) {
274 		error = EINVAL;
275 		goto done;
276 	}
277 	link_mode = mlx5e_find_link_mode(IFM_SUBTYPE(priv->media.ifm_media));
278 
279 	/* query supported capabilities */
280 	error = mlx5_query_port_proto_cap(mdev, &eth_proto_cap, MLX5_PTYS_EN);
281 	if (error != 0) {
282 		if_printf(dev, "Query port media capability failed\n");
283 		goto done;
284 	}
285 	/* check for autoselect */
286 	if (IFM_SUBTYPE(priv->media.ifm_media) == IFM_AUTO) {
287 		link_mode = eth_proto_cap;
288 		if (link_mode == 0) {
289 			if_printf(dev, "Port media capability is zero\n");
290 			error = EINVAL;
291 			goto done;
292 		}
293 	} else {
294 		link_mode = link_mode & eth_proto_cap;
295 		if (link_mode == 0) {
296 			if_printf(dev, "Not supported link mode requested\n");
297 			error = EINVAL;
298 			goto done;
299 		}
300 	}
301 	if (priv->media.ifm_media & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE)) {
302 		/* check if PFC is enabled */
303 		if (priv->params.rx_priority_flow_control ||
304 		    priv->params.tx_priority_flow_control) {
305 			if_printf(dev, "PFC must be disabled before enabling global pauseframes.\n");
306 			error = EINVAL;
307 			goto done;
308 		}
309 	}
310 	/* update pauseframe control bits */
311 	priv->params.rx_pauseframe_control =
312 	    (priv->media.ifm_media & IFM_ETH_RXPAUSE) ? 1 : 0;
313 	priv->params.tx_pauseframe_control =
314 	    (priv->media.ifm_media & IFM_ETH_TXPAUSE) ? 1 : 0;
315 
316 	/* check if device is opened */
317 	was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
318 
319 	/* reconfigure the hardware */
320 	mlx5_set_port_status(mdev, MLX5_PORT_DOWN);
321 	mlx5_set_port_proto(mdev, link_mode, MLX5_PTYS_EN);
322 	error = -mlx5e_set_port_pause_and_pfc(priv);
323 	if (was_opened)
324 		mlx5_set_port_status(mdev, MLX5_PORT_UP);
325 
326 done:
327 	if (!locked)
328 		PRIV_UNLOCK(priv);
329 	return (error);
330 }
331 
332 static void
333 mlx5e_update_carrier_work(struct work_struct *work)
334 {
335 	struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
336 	    update_carrier_work);
337 
338 	PRIV_LOCK(priv);
339 	if (test_bit(MLX5E_STATE_OPENED, &priv->state))
340 		mlx5e_update_carrier(priv);
341 	PRIV_UNLOCK(priv);
342 }
343 
344 /*
345  * This function reads the physical port counters from the firmware
346  * using a pre-defined layout defined by various MLX5E_PPORT_XXX()
347  * macros. The output is converted from big-endian 64-bit values into
348  * host endian ones and stored in the "priv->stats.pport" structure.
349  */
350 static void
351 mlx5e_update_pport_counters(struct mlx5e_priv *priv)
352 {
353 	struct mlx5_core_dev *mdev = priv->mdev;
354 	struct mlx5e_pport_stats *s = &priv->stats.pport;
355 	struct mlx5e_port_stats_debug *s_debug = &priv->stats.port_stats_debug;
356 	u32 *in;
357 	u32 *out;
358 	const u64 *ptr;
359 	unsigned sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
360 	unsigned x;
361 	unsigned y;
362 	unsigned z;
363 
364 	/* allocate firmware request structures */
365 	in = mlx5_vzalloc(sz);
366 	out = mlx5_vzalloc(sz);
367 	if (in == NULL || out == NULL)
368 		goto free_out;
369 
370 	/*
371 	 * Get pointer to the 64-bit counter set which is located at a
372 	 * fixed offset in the output firmware request structure:
373 	 */
374 	ptr = (const uint64_t *)MLX5_ADDR_OF(ppcnt_reg, out, counter_set);
375 
376 	MLX5_SET(ppcnt_reg, in, local_port, 1);
377 
378 	/* read IEEE802_3 counter group using predefined counter layout */
379 	MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP);
380 	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
381 	for (x = 0, y = MLX5E_PPORT_PER_PRIO_STATS_NUM;
382 	     x != MLX5E_PPORT_IEEE802_3_STATS_NUM; x++, y++)
383 		s->arg[y] = be64toh(ptr[x]);
384 
385 	/* read RFC2819 counter group using predefined counter layout */
386 	MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP);
387 	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
388 	for (x = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM; x++, y++)
389 		s->arg[y] = be64toh(ptr[x]);
390 	for (y = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM +
391 	    MLX5E_PPORT_RFC2819_STATS_DEBUG_NUM; x++, y++)
392 		s_debug->arg[y] = be64toh(ptr[x]);
393 
394 	/* read RFC2863 counter group using predefined counter layout */
395 	MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2863_COUNTERS_GROUP);
396 	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
397 	for (x = 0; x != MLX5E_PPORT_RFC2863_STATS_DEBUG_NUM; x++, y++)
398 		s_debug->arg[y] = be64toh(ptr[x]);
399 
400 	/* read physical layer stats counter group using predefined counter layout */
401 	MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP);
402 	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
403 	for (x = 0; x != MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG_NUM; x++, y++)
404 		s_debug->arg[y] = be64toh(ptr[x]);
405 
406 	/* read per-priority counters */
407 	MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_PRIORITY_COUNTERS_GROUP);
408 
409 	/* iterate all the priorities */
410 	for (y = z = 0; z != MLX5E_PPORT_PER_PRIO_STATS_NUM_PRIO; z++) {
411 		MLX5_SET(ppcnt_reg, in, prio_tc, z);
412 		mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0);
413 
414 		/* read per priority stats counter group using predefined counter layout */
415 		for (x = 0; x != (MLX5E_PPORT_PER_PRIO_STATS_NUM /
416 		    MLX5E_PPORT_PER_PRIO_STATS_NUM_PRIO); x++, y++)
417 			s->arg[y] = be64toh(ptr[x]);
418 	}
419 free_out:
420 	/* free firmware request structures */
421 	kvfree(in);
422 	kvfree(out);
423 }
424 
425 /*
426  * This function is called regularly to collect all statistics
427  * counters from the firmware. The values can be viewed through the
428  * sysctl interface. Execution is serialized using the priv's global
429  * configuration lock.
430  */
431 static void
432 mlx5e_update_stats_work(struct work_struct *work)
433 {
434 	struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
435 	    update_stats_work);
436 	struct mlx5_core_dev *mdev = priv->mdev;
437 	struct mlx5e_vport_stats *s = &priv->stats.vport;
438 	struct mlx5e_rq_stats *rq_stats;
439 	struct mlx5e_sq_stats *sq_stats;
440 	struct buf_ring *sq_br;
441 #if (__FreeBSD_version < 1100000)
442 	struct ifnet *ifp = priv->ifp;
443 #endif
444 
445 	u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)];
446 	u32 *out;
447 	int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out);
448 	u64 tso_packets = 0;
449 	u64 tso_bytes = 0;
450 	u64 tx_queue_dropped = 0;
451 	u64 tx_defragged = 0;
452 	u64 tx_offload_none = 0;
453 	u64 lro_packets = 0;
454 	u64 lro_bytes = 0;
455 	u64 sw_lro_queued = 0;
456 	u64 sw_lro_flushed = 0;
457 	u64 rx_csum_none = 0;
458 	u64 rx_wqe_err = 0;
459 	u32 rx_out_of_buffer = 0;
460 	int i;
461 	int j;
462 
463 	PRIV_LOCK(priv);
464 	out = mlx5_vzalloc(outlen);
465 	if (out == NULL)
466 		goto free_out;
467 	if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
468 		goto free_out;
469 
470 	/* Collect firts the SW counters and then HW for consistency */
471 	for (i = 0; i < priv->params.num_channels; i++) {
472 		struct mlx5e_rq *rq = &priv->channel[i]->rq;
473 
474 		rq_stats = &priv->channel[i]->rq.stats;
475 
476 		/* collect stats from LRO */
477 		rq_stats->sw_lro_queued = rq->lro.lro_queued;
478 		rq_stats->sw_lro_flushed = rq->lro.lro_flushed;
479 		sw_lro_queued += rq_stats->sw_lro_queued;
480 		sw_lro_flushed += rq_stats->sw_lro_flushed;
481 		lro_packets += rq_stats->lro_packets;
482 		lro_bytes += rq_stats->lro_bytes;
483 		rx_csum_none += rq_stats->csum_none;
484 		rx_wqe_err += rq_stats->wqe_err;
485 
486 		for (j = 0; j < priv->num_tc; j++) {
487 			sq_stats = &priv->channel[i]->sq[j].stats;
488 			sq_br = priv->channel[i]->sq[j].br;
489 
490 			tso_packets += sq_stats->tso_packets;
491 			tso_bytes += sq_stats->tso_bytes;
492 			tx_queue_dropped += sq_stats->dropped;
493 			if (sq_br != NULL)
494 				tx_queue_dropped += sq_br->br_drops;
495 			tx_defragged += sq_stats->defragged;
496 			tx_offload_none += sq_stats->csum_offload_none;
497 		}
498 	}
499 
500 	/* update counters */
501 	s->tso_packets = tso_packets;
502 	s->tso_bytes = tso_bytes;
503 	s->tx_queue_dropped = tx_queue_dropped;
504 	s->tx_defragged = tx_defragged;
505 	s->lro_packets = lro_packets;
506 	s->lro_bytes = lro_bytes;
507 	s->sw_lro_queued = sw_lro_queued;
508 	s->sw_lro_flushed = sw_lro_flushed;
509 	s->rx_csum_none = rx_csum_none;
510 	s->rx_wqe_err = rx_wqe_err;
511 
512 	/* HW counters */
513 	memset(in, 0, sizeof(in));
514 
515 	MLX5_SET(query_vport_counter_in, in, opcode,
516 	    MLX5_CMD_OP_QUERY_VPORT_COUNTER);
517 	MLX5_SET(query_vport_counter_in, in, op_mod, 0);
518 	MLX5_SET(query_vport_counter_in, in, other_vport, 0);
519 
520 	memset(out, 0, outlen);
521 
522 	/* get number of out-of-buffer drops first */
523 	if (mlx5_vport_query_out_of_rx_buffer(mdev, priv->counter_set_id,
524 	    &rx_out_of_buffer))
525 		goto free_out;
526 
527 	/* accumulate difference into a 64-bit counter */
528 	s->rx_out_of_buffer += (u64)(u32)(rx_out_of_buffer - s->rx_out_of_buffer_prev);
529 	s->rx_out_of_buffer_prev = rx_out_of_buffer;
530 
531 	/* get port statistics */
532 	if (mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen))
533 		goto free_out;
534 
535 #define	MLX5_GET_CTR(out, x) \
536 	MLX5_GET64(query_vport_counter_out, out, x)
537 
538 	s->rx_error_packets =
539 	    MLX5_GET_CTR(out, received_errors.packets);
540 	s->rx_error_bytes =
541 	    MLX5_GET_CTR(out, received_errors.octets);
542 	s->tx_error_packets =
543 	    MLX5_GET_CTR(out, transmit_errors.packets);
544 	s->tx_error_bytes =
545 	    MLX5_GET_CTR(out, transmit_errors.octets);
546 
547 	s->rx_unicast_packets =
548 	    MLX5_GET_CTR(out, received_eth_unicast.packets);
549 	s->rx_unicast_bytes =
550 	    MLX5_GET_CTR(out, received_eth_unicast.octets);
551 	s->tx_unicast_packets =
552 	    MLX5_GET_CTR(out, transmitted_eth_unicast.packets);
553 	s->tx_unicast_bytes =
554 	    MLX5_GET_CTR(out, transmitted_eth_unicast.octets);
555 
556 	s->rx_multicast_packets =
557 	    MLX5_GET_CTR(out, received_eth_multicast.packets);
558 	s->rx_multicast_bytes =
559 	    MLX5_GET_CTR(out, received_eth_multicast.octets);
560 	s->tx_multicast_packets =
561 	    MLX5_GET_CTR(out, transmitted_eth_multicast.packets);
562 	s->tx_multicast_bytes =
563 	    MLX5_GET_CTR(out, transmitted_eth_multicast.octets);
564 
565 	s->rx_broadcast_packets =
566 	    MLX5_GET_CTR(out, received_eth_broadcast.packets);
567 	s->rx_broadcast_bytes =
568 	    MLX5_GET_CTR(out, received_eth_broadcast.octets);
569 	s->tx_broadcast_packets =
570 	    MLX5_GET_CTR(out, transmitted_eth_broadcast.packets);
571 	s->tx_broadcast_bytes =
572 	    MLX5_GET_CTR(out, transmitted_eth_broadcast.octets);
573 
574 	s->rx_packets =
575 	    s->rx_unicast_packets +
576 	    s->rx_multicast_packets +
577 	    s->rx_broadcast_packets -
578 	    s->rx_out_of_buffer;
579 	s->rx_bytes =
580 	    s->rx_unicast_bytes +
581 	    s->rx_multicast_bytes +
582 	    s->rx_broadcast_bytes;
583 	s->tx_packets =
584 	    s->tx_unicast_packets +
585 	    s->tx_multicast_packets +
586 	    s->tx_broadcast_packets;
587 	s->tx_bytes =
588 	    s->tx_unicast_bytes +
589 	    s->tx_multicast_bytes +
590 	    s->tx_broadcast_bytes;
591 
592 	/* Update calculated offload counters */
593 	s->tx_csum_offload = s->tx_packets - tx_offload_none;
594 	s->rx_csum_good = s->rx_packets - s->rx_csum_none;
595 
596 	/* Get physical port counters */
597 	mlx5e_update_pport_counters(priv);
598 
599 #if (__FreeBSD_version < 1100000)
600 	/* no get_counters interface in fbsd 10 */
601 	ifp->if_ipackets = s->rx_packets;
602 	ifp->if_ierrors = s->rx_error_packets +
603 	    priv->stats.pport.alignment_err +
604 	    priv->stats.pport.check_seq_err +
605 	    priv->stats.pport.crc_align_errors +
606 	    priv->stats.pport.in_range_len_errors +
607 	    priv->stats.pport.jabbers +
608 	    priv->stats.pport.out_of_range_len +
609 	    priv->stats.pport.oversize_pkts +
610 	    priv->stats.pport.symbol_err +
611 	    priv->stats.pport.too_long_errors +
612 	    priv->stats.pport.undersize_pkts +
613 	    priv->stats.pport.unsupported_op_rx;
614 	ifp->if_iqdrops = s->rx_out_of_buffer +
615 	    priv->stats.pport.drop_events;
616 	ifp->if_opackets = s->tx_packets;
617 	ifp->if_oerrors = s->tx_error_packets;
618 	ifp->if_snd.ifq_drops = s->tx_queue_dropped;
619 	ifp->if_ibytes = s->rx_bytes;
620 	ifp->if_obytes = s->tx_bytes;
621 	ifp->if_collisions =
622 	    priv->stats.pport.collisions;
623 #endif
624 
625 free_out:
626 	kvfree(out);
627 
628 	/* Update diagnostics, if any */
629 	if (priv->params_ethtool.diag_pci_enable ||
630 	    priv->params_ethtool.diag_general_enable) {
631 		int error = mlx5_core_get_diagnostics_full(mdev,
632 		    priv->params_ethtool.diag_pci_enable ? &priv->params_pci : NULL,
633 		    priv->params_ethtool.diag_general_enable ? &priv->params_general : NULL);
634 		if (error != 0)
635 			if_printf(priv->ifp, "Failed reading diagnostics: %d\n", error);
636 	}
637 	PRIV_UNLOCK(priv);
638 }
639 
640 static void
641 mlx5e_update_stats(void *arg)
642 {
643 	struct mlx5e_priv *priv = arg;
644 
645 	queue_work(priv->wq, &priv->update_stats_work);
646 
647 	callout_reset(&priv->watchdog, hz, &mlx5e_update_stats, priv);
648 }
649 
650 static void
651 mlx5e_async_event_sub(struct mlx5e_priv *priv,
652     enum mlx5_dev_event event)
653 {
654 	switch (event) {
655 	case MLX5_DEV_EVENT_PORT_UP:
656 	case MLX5_DEV_EVENT_PORT_DOWN:
657 		queue_work(priv->wq, &priv->update_carrier_work);
658 		break;
659 
660 	default:
661 		break;
662 	}
663 }
664 
665 static void
666 mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv,
667     enum mlx5_dev_event event, unsigned long param)
668 {
669 	struct mlx5e_priv *priv = vpriv;
670 
671 	mtx_lock(&priv->async_events_mtx);
672 	if (test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state))
673 		mlx5e_async_event_sub(priv, event);
674 	mtx_unlock(&priv->async_events_mtx);
675 }
676 
677 static void
678 mlx5e_enable_async_events(struct mlx5e_priv *priv)
679 {
680 	set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
681 }
682 
683 static void
684 mlx5e_disable_async_events(struct mlx5e_priv *priv)
685 {
686 	mtx_lock(&priv->async_events_mtx);
687 	clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state);
688 	mtx_unlock(&priv->async_events_mtx);
689 }
690 
691 static void mlx5e_calibration_callout(void *arg);
692 static int mlx5e_calibration_duration = 20;
693 static int mlx5e_fast_calibration = 1;
694 static int mlx5e_normal_calibration = 30;
695 
696 static SYSCTL_NODE(_hw_mlx5, OID_AUTO, calibr, CTLFLAG_RW, 0,
697     "MLX5 timestamp calibration parameteres");
698 
699 SYSCTL_INT(_hw_mlx5_calibr, OID_AUTO, duration, CTLFLAG_RWTUN,
700     &mlx5e_calibration_duration, 0,
701     "Duration of initial calibration");
702 SYSCTL_INT(_hw_mlx5_calibr, OID_AUTO, fast, CTLFLAG_RWTUN,
703     &mlx5e_fast_calibration, 0,
704     "Recalibration interval during initial calibration");
705 SYSCTL_INT(_hw_mlx5_calibr, OID_AUTO, normal, CTLFLAG_RWTUN,
706     &mlx5e_normal_calibration, 0,
707     "Recalibration interval during normal operations");
708 
709 /*
710  * Ignites the calibration process.
711  */
712 static void
713 mlx5e_reset_calibration_callout(struct mlx5e_priv *priv)
714 {
715 
716 	if (priv->clbr_done == 0)
717 		mlx5e_calibration_callout(priv);
718 	else
719 		callout_reset_curcpu(&priv->tstmp_clbr, (priv->clbr_done <
720 		    mlx5e_calibration_duration ? mlx5e_fast_calibration :
721 		    mlx5e_normal_calibration) * hz, mlx5e_calibration_callout,
722 		    priv);
723 }
724 
725 static uint64_t
726 mlx5e_timespec2usec(const struct timespec *ts)
727 {
728 
729 	return ((uint64_t)ts->tv_sec * 1000000000 + ts->tv_nsec);
730 }
731 
732 static uint64_t
733 mlx5e_hw_clock(struct mlx5e_priv *priv)
734 {
735 	struct mlx5_init_seg *iseg;
736 	uint32_t hw_h, hw_h1, hw_l;
737 
738 	iseg = priv->mdev->iseg;
739 	do {
740 		hw_h = ioread32be(&iseg->internal_timer_h);
741 		hw_l = ioread32be(&iseg->internal_timer_l);
742 		hw_h1 = ioread32be(&iseg->internal_timer_h);
743 	} while (hw_h1 != hw_h);
744 	return (((uint64_t)hw_h << 32) | hw_l);
745 }
746 
747 /*
748  * The calibration callout, it runs either in the context of the
749  * thread which enables calibration, or in callout.  It takes the
750  * snapshot of system and adapter clocks, then advances the pointers to
751  * the calibration point to allow rx path to read the consistent data
752  * lockless.
753  */
754 static void
755 mlx5e_calibration_callout(void *arg)
756 {
757 	struct mlx5e_priv *priv;
758 	struct mlx5e_clbr_point *next, *curr;
759 	struct timespec ts;
760 	int clbr_curr_next;
761 
762 	priv = arg;
763 	curr = &priv->clbr_points[priv->clbr_curr];
764 	clbr_curr_next = priv->clbr_curr + 1;
765 	if (clbr_curr_next >= nitems(priv->clbr_points))
766 		clbr_curr_next = 0;
767 	next = &priv->clbr_points[clbr_curr_next];
768 
769 	next->base_prev = curr->base_curr;
770 	next->clbr_hw_prev = curr->clbr_hw_curr;
771 
772 	next->clbr_hw_curr = mlx5e_hw_clock(priv);
773 	if (((next->clbr_hw_curr - curr->clbr_hw_prev) >> MLX5E_TSTMP_PREC) ==
774 	    0) {
775 		if_printf(priv->ifp, "HW failed tstmp frozen %#jx %#jx,"
776 		    "disabling\n", next->clbr_hw_curr, curr->clbr_hw_prev);
777 		priv->clbr_done = 0;
778 		return;
779 	}
780 
781 	nanouptime(&ts);
782 	next->base_curr = mlx5e_timespec2usec(&ts);
783 
784 	curr->clbr_gen = 0;
785 	atomic_thread_fence_rel();
786 	priv->clbr_curr = clbr_curr_next;
787 	atomic_store_rel_int(&next->clbr_gen, ++(priv->clbr_gen));
788 
789 	if (priv->clbr_done < mlx5e_calibration_duration)
790 		priv->clbr_done++;
791 	mlx5e_reset_calibration_callout(priv);
792 }
793 
794 static const char *mlx5e_rq_stats_desc[] = {
795 	MLX5E_RQ_STATS(MLX5E_STATS_DESC)
796 };
797 
798 static int
799 mlx5e_create_rq(struct mlx5e_channel *c,
800     struct mlx5e_rq_param *param,
801     struct mlx5e_rq *rq)
802 {
803 	struct mlx5e_priv *priv = c->priv;
804 	struct mlx5_core_dev *mdev = priv->mdev;
805 	char buffer[16];
806 	void *rqc = param->rqc;
807 	void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
808 	int wq_sz;
809 	int err;
810 	int i;
811 
812 	/* Create DMA descriptor TAG */
813 	if ((err = -bus_dma_tag_create(
814 	    bus_get_dma_tag(mdev->pdev->dev.bsddev),
815 	    1,				/* any alignment */
816 	    0,				/* no boundary */
817 	    BUS_SPACE_MAXADDR,		/* lowaddr */
818 	    BUS_SPACE_MAXADDR,		/* highaddr */
819 	    NULL, NULL,			/* filter, filterarg */
820 	    MJUM16BYTES,		/* maxsize */
821 	    1,				/* nsegments */
822 	    MJUM16BYTES,		/* maxsegsize */
823 	    0,				/* flags */
824 	    NULL, NULL,			/* lockfunc, lockfuncarg */
825 	    &rq->dma_tag)))
826 		goto done;
827 
828 	err = mlx5_wq_ll_create(mdev, &param->wq, rqc_wq, &rq->wq,
829 	    &rq->wq_ctrl);
830 	if (err)
831 		goto err_free_dma_tag;
832 
833 	rq->wq.db = &rq->wq.db[MLX5_RCV_DBR];
834 
835 	if (priv->params.hw_lro_en) {
836 		rq->wqe_sz = priv->params.lro_wqe_sz;
837 	} else {
838 		rq->wqe_sz = MLX5E_SW2MB_MTU(priv->ifp->if_mtu);
839 	}
840 	if (rq->wqe_sz > MJUM16BYTES) {
841 		err = -ENOMEM;
842 		goto err_rq_wq_destroy;
843 	} else if (rq->wqe_sz > MJUM9BYTES) {
844 		rq->wqe_sz = MJUM16BYTES;
845 	} else if (rq->wqe_sz > MJUMPAGESIZE) {
846 		rq->wqe_sz = MJUM9BYTES;
847 	} else if (rq->wqe_sz > MCLBYTES) {
848 		rq->wqe_sz = MJUMPAGESIZE;
849 	} else {
850 		rq->wqe_sz = MCLBYTES;
851 	}
852 
853 	wq_sz = mlx5_wq_ll_get_size(&rq->wq);
854 
855 	err = -tcp_lro_init_args(&rq->lro, c->ifp, TCP_LRO_ENTRIES, wq_sz);
856 	if (err)
857 		goto err_rq_wq_destroy;
858 
859 	rq->mbuf = malloc(wq_sz * sizeof(rq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO);
860 	for (i = 0; i != wq_sz; i++) {
861 		struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i);
862 		uint32_t byte_count = rq->wqe_sz - MLX5E_NET_IP_ALIGN;
863 
864 		err = -bus_dmamap_create(rq->dma_tag, 0, &rq->mbuf[i].dma_map);
865 		if (err != 0) {
866 			while (i--)
867 				bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map);
868 			goto err_rq_mbuf_free;
869 		}
870 		wqe->data.lkey = c->mkey_be;
871 		wqe->data.byte_count = cpu_to_be32(byte_count | MLX5_HW_START_PADDING);
872 	}
873 
874 	rq->ifp = c->ifp;
875 	rq->channel = c;
876 	rq->ix = c->ix;
877 
878 	snprintf(buffer, sizeof(buffer), "rxstat%d", c->ix);
879 	mlx5e_create_stats(&rq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
880 	    buffer, mlx5e_rq_stats_desc, MLX5E_RQ_STATS_NUM,
881 	    rq->stats.arg);
882 	return (0);
883 
884 err_rq_mbuf_free:
885 	free(rq->mbuf, M_MLX5EN);
886 	tcp_lro_free(&rq->lro);
887 err_rq_wq_destroy:
888 	mlx5_wq_destroy(&rq->wq_ctrl);
889 err_free_dma_tag:
890 	bus_dma_tag_destroy(rq->dma_tag);
891 done:
892 	return (err);
893 }
894 
895 static void
896 mlx5e_destroy_rq(struct mlx5e_rq *rq)
897 {
898 	int wq_sz;
899 	int i;
900 
901 	/* destroy all sysctl nodes */
902 	sysctl_ctx_free(&rq->stats.ctx);
903 
904 	/* free leftover LRO packets, if any */
905 	tcp_lro_free(&rq->lro);
906 
907 	wq_sz = mlx5_wq_ll_get_size(&rq->wq);
908 	for (i = 0; i != wq_sz; i++) {
909 		if (rq->mbuf[i].mbuf != NULL) {
910 			bus_dmamap_unload(rq->dma_tag, rq->mbuf[i].dma_map);
911 			m_freem(rq->mbuf[i].mbuf);
912 		}
913 		bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map);
914 	}
915 	free(rq->mbuf, M_MLX5EN);
916 	mlx5_wq_destroy(&rq->wq_ctrl);
917 }
918 
919 static int
920 mlx5e_enable_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param)
921 {
922 	struct mlx5e_channel *c = rq->channel;
923 	struct mlx5e_priv *priv = c->priv;
924 	struct mlx5_core_dev *mdev = priv->mdev;
925 
926 	void *in;
927 	void *rqc;
928 	void *wq;
929 	int inlen;
930 	int err;
931 
932 	inlen = MLX5_ST_SZ_BYTES(create_rq_in) +
933 	    sizeof(u64) * rq->wq_ctrl.buf.npages;
934 	in = mlx5_vzalloc(inlen);
935 	if (in == NULL)
936 		return (-ENOMEM);
937 
938 	rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
939 	wq = MLX5_ADDR_OF(rqc, rqc, wq);
940 
941 	memcpy(rqc, param->rqc, sizeof(param->rqc));
942 
943 	MLX5_SET(rqc, rqc, cqn, c->rq.cq.mcq.cqn);
944 	MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
945 	MLX5_SET(rqc, rqc, flush_in_error_en, 1);
946 	if (priv->counter_set_id >= 0)
947 		MLX5_SET(rqc, rqc, counter_set_id, priv->counter_set_id);
948 	MLX5_SET(wq, wq, log_wq_pg_sz, rq->wq_ctrl.buf.page_shift -
949 	    PAGE_SHIFT);
950 	MLX5_SET64(wq, wq, dbr_addr, rq->wq_ctrl.db.dma);
951 
952 	mlx5_fill_page_array(&rq->wq_ctrl.buf,
953 	    (__be64 *) MLX5_ADDR_OF(wq, wq, pas));
954 
955 	err = mlx5_core_create_rq(mdev, in, inlen, &rq->rqn);
956 
957 	kvfree(in);
958 
959 	return (err);
960 }
961 
962 static int
963 mlx5e_modify_rq(struct mlx5e_rq *rq, int curr_state, int next_state)
964 {
965 	struct mlx5e_channel *c = rq->channel;
966 	struct mlx5e_priv *priv = c->priv;
967 	struct mlx5_core_dev *mdev = priv->mdev;
968 
969 	void *in;
970 	void *rqc;
971 	int inlen;
972 	int err;
973 
974 	inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
975 	in = mlx5_vzalloc(inlen);
976 	if (in == NULL)
977 		return (-ENOMEM);
978 
979 	rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
980 
981 	MLX5_SET(modify_rq_in, in, rqn, rq->rqn);
982 	MLX5_SET(modify_rq_in, in, rq_state, curr_state);
983 	MLX5_SET(rqc, rqc, state, next_state);
984 
985 	err = mlx5_core_modify_rq(mdev, in, inlen);
986 
987 	kvfree(in);
988 
989 	return (err);
990 }
991 
992 static void
993 mlx5e_disable_rq(struct mlx5e_rq *rq)
994 {
995 	struct mlx5e_channel *c = rq->channel;
996 	struct mlx5e_priv *priv = c->priv;
997 	struct mlx5_core_dev *mdev = priv->mdev;
998 
999 	mlx5_core_destroy_rq(mdev, rq->rqn);
1000 }
1001 
1002 static int
1003 mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq)
1004 {
1005 	struct mlx5e_channel *c = rq->channel;
1006 	struct mlx5e_priv *priv = c->priv;
1007 	struct mlx5_wq_ll *wq = &rq->wq;
1008 	int i;
1009 
1010 	for (i = 0; i < 1000; i++) {
1011 		if (wq->cur_sz >= priv->params.min_rx_wqes)
1012 			return (0);
1013 
1014 		msleep(4);
1015 	}
1016 	return (-ETIMEDOUT);
1017 }
1018 
1019 static int
1020 mlx5e_open_rq(struct mlx5e_channel *c,
1021     struct mlx5e_rq_param *param,
1022     struct mlx5e_rq *rq)
1023 {
1024 	int err;
1025 
1026 	err = mlx5e_create_rq(c, param, rq);
1027 	if (err)
1028 		return (err);
1029 
1030 	err = mlx5e_enable_rq(rq, param);
1031 	if (err)
1032 		goto err_destroy_rq;
1033 
1034 	err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
1035 	if (err)
1036 		goto err_disable_rq;
1037 
1038 	c->rq.enabled = 1;
1039 
1040 	return (0);
1041 
1042 err_disable_rq:
1043 	mlx5e_disable_rq(rq);
1044 err_destroy_rq:
1045 	mlx5e_destroy_rq(rq);
1046 
1047 	return (err);
1048 }
1049 
1050 static void
1051 mlx5e_close_rq(struct mlx5e_rq *rq)
1052 {
1053 	mtx_lock(&rq->mtx);
1054 	rq->enabled = 0;
1055 	callout_stop(&rq->watchdog);
1056 	mtx_unlock(&rq->mtx);
1057 
1058 	callout_drain(&rq->watchdog);
1059 
1060 	mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
1061 }
1062 
1063 static void
1064 mlx5e_close_rq_wait(struct mlx5e_rq *rq)
1065 {
1066 	struct mlx5_core_dev *mdev = rq->channel->priv->mdev;
1067 
1068 	/* wait till RQ is empty */
1069 	while (!mlx5_wq_ll_is_empty(&rq->wq) &&
1070 	       (mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR)) {
1071 		msleep(4);
1072 		rq->cq.mcq.comp(&rq->cq.mcq);
1073 	}
1074 
1075 	mlx5e_disable_rq(rq);
1076 	mlx5e_destroy_rq(rq);
1077 }
1078 
1079 void
1080 mlx5e_free_sq_db(struct mlx5e_sq *sq)
1081 {
1082 	int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
1083 	int x;
1084 
1085 	for (x = 0; x != wq_sz; x++)
1086 		bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map);
1087 	free(sq->mbuf, M_MLX5EN);
1088 }
1089 
1090 int
1091 mlx5e_alloc_sq_db(struct mlx5e_sq *sq)
1092 {
1093 	int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
1094 	int err;
1095 	int x;
1096 
1097 	sq->mbuf = malloc(wq_sz * sizeof(sq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO);
1098 
1099 	/* Create DMA descriptor MAPs */
1100 	for (x = 0; x != wq_sz; x++) {
1101 		err = -bus_dmamap_create(sq->dma_tag, 0, &sq->mbuf[x].dma_map);
1102 		if (err != 0) {
1103 			while (x--)
1104 				bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map);
1105 			free(sq->mbuf, M_MLX5EN);
1106 			return (err);
1107 		}
1108 	}
1109 	return (0);
1110 }
1111 
1112 static const char *mlx5e_sq_stats_desc[] = {
1113 	MLX5E_SQ_STATS(MLX5E_STATS_DESC)
1114 };
1115 
1116 static int
1117 mlx5e_create_sq(struct mlx5e_channel *c,
1118     int tc,
1119     struct mlx5e_sq_param *param,
1120     struct mlx5e_sq *sq)
1121 {
1122 	struct mlx5e_priv *priv = c->priv;
1123 	struct mlx5_core_dev *mdev = priv->mdev;
1124 	char buffer[16];
1125 
1126 	void *sqc = param->sqc;
1127 	void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq);
1128 #ifdef RSS
1129 	cpuset_t cpu_mask;
1130 	int cpu_id;
1131 #endif
1132 	int err;
1133 
1134 	/* Create DMA descriptor TAG */
1135 	if ((err = -bus_dma_tag_create(
1136 	    bus_get_dma_tag(mdev->pdev->dev.bsddev),
1137 	    1,				/* any alignment */
1138 	    0,				/* no boundary */
1139 	    BUS_SPACE_MAXADDR,		/* lowaddr */
1140 	    BUS_SPACE_MAXADDR,		/* highaddr */
1141 	    NULL, NULL,			/* filter, filterarg */
1142 	    MLX5E_MAX_TX_PAYLOAD_SIZE,	/* maxsize */
1143 	    MLX5E_MAX_TX_MBUF_FRAGS,	/* nsegments */
1144 	    MLX5E_MAX_TX_MBUF_SIZE,	/* maxsegsize */
1145 	    0,				/* flags */
1146 	    NULL, NULL,			/* lockfunc, lockfuncarg */
1147 	    &sq->dma_tag)))
1148 		goto done;
1149 
1150 	err = mlx5_alloc_map_uar(mdev, &sq->uar);
1151 	if (err)
1152 		goto err_free_dma_tag;
1153 
1154 	err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq,
1155 	    &sq->wq_ctrl);
1156 	if (err)
1157 		goto err_unmap_free_uar;
1158 
1159 	sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
1160 	sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2;
1161 
1162 	err = mlx5e_alloc_sq_db(sq);
1163 	if (err)
1164 		goto err_sq_wq_destroy;
1165 
1166 	sq->mkey_be = c->mkey_be;
1167 	sq->ifp = priv->ifp;
1168 	sq->priv = priv;
1169 	sq->tc = tc;
1170 
1171 	/* check if we should allocate a second packet buffer */
1172 	if (priv->params_ethtool.tx_bufring_disable == 0) {
1173 		sq->br = buf_ring_alloc(MLX5E_SQ_TX_QUEUE_SIZE, M_MLX5EN,
1174 		    M_WAITOK, &sq->lock);
1175 		if (sq->br == NULL) {
1176 			if_printf(c->ifp, "%s: Failed allocating sq drbr buffer\n",
1177 			    __func__);
1178 			err = -ENOMEM;
1179 			goto err_free_sq_db;
1180 		}
1181 
1182 		sq->sq_tq = taskqueue_create_fast("mlx5e_que", M_WAITOK,
1183 		    taskqueue_thread_enqueue, &sq->sq_tq);
1184 		if (sq->sq_tq == NULL) {
1185 			if_printf(c->ifp, "%s: Failed allocating taskqueue\n",
1186 			    __func__);
1187 			err = -ENOMEM;
1188 			goto err_free_drbr;
1189 		}
1190 
1191 		TASK_INIT(&sq->sq_task, 0, mlx5e_tx_que, sq);
1192 #ifdef RSS
1193 		cpu_id = rss_getcpu(c->ix % rss_getnumbuckets());
1194 		CPU_SETOF(cpu_id, &cpu_mask);
1195 		taskqueue_start_threads_cpuset(&sq->sq_tq, 1, PI_NET, &cpu_mask,
1196 		    "%s TX SQ%d.%d CPU%d", c->ifp->if_xname, c->ix, tc, cpu_id);
1197 #else
1198 		taskqueue_start_threads(&sq->sq_tq, 1, PI_NET,
1199 		    "%s TX SQ%d.%d", c->ifp->if_xname, c->ix, tc);
1200 #endif
1201 	}
1202 	snprintf(buffer, sizeof(buffer), "txstat%dtc%d", c->ix, tc);
1203 	mlx5e_create_stats(&sq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
1204 	    buffer, mlx5e_sq_stats_desc, MLX5E_SQ_STATS_NUM,
1205 	    sq->stats.arg);
1206 
1207 	return (0);
1208 
1209 err_free_drbr:
1210 	buf_ring_free(sq->br, M_MLX5EN);
1211 err_free_sq_db:
1212 	mlx5e_free_sq_db(sq);
1213 err_sq_wq_destroy:
1214 	mlx5_wq_destroy(&sq->wq_ctrl);
1215 
1216 err_unmap_free_uar:
1217 	mlx5_unmap_free_uar(mdev, &sq->uar);
1218 
1219 err_free_dma_tag:
1220 	bus_dma_tag_destroy(sq->dma_tag);
1221 done:
1222 	return (err);
1223 }
1224 
1225 static void
1226 mlx5e_destroy_sq(struct mlx5e_sq *sq)
1227 {
1228 	/* destroy all sysctl nodes */
1229 	sysctl_ctx_free(&sq->stats.ctx);
1230 
1231 	mlx5e_free_sq_db(sq);
1232 	mlx5_wq_destroy(&sq->wq_ctrl);
1233 	mlx5_unmap_free_uar(sq->priv->mdev, &sq->uar);
1234 	if (sq->sq_tq != NULL) {
1235 		taskqueue_drain(sq->sq_tq, &sq->sq_task);
1236 		taskqueue_free(sq->sq_tq);
1237 	}
1238 	if (sq->br != NULL)
1239 		buf_ring_free(sq->br, M_MLX5EN);
1240 }
1241 
1242 int
1243 mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param,
1244     int tis_num)
1245 {
1246 	void *in;
1247 	void *sqc;
1248 	void *wq;
1249 	int inlen;
1250 	int err;
1251 
1252 	inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
1253 	    sizeof(u64) * sq->wq_ctrl.buf.npages;
1254 	in = mlx5_vzalloc(inlen);
1255 	if (in == NULL)
1256 		return (-ENOMEM);
1257 
1258 	sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
1259 	wq = MLX5_ADDR_OF(sqc, sqc, wq);
1260 
1261 	memcpy(sqc, param->sqc, sizeof(param->sqc));
1262 
1263 	MLX5_SET(sqc, sqc, tis_num_0, tis_num);
1264 	MLX5_SET(sqc, sqc, cqn, sq->cq.mcq.cqn);
1265 	MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
1266 	MLX5_SET(sqc, sqc, tis_lst_sz, 1);
1267 	MLX5_SET(sqc, sqc, flush_in_error_en, 1);
1268 
1269 	MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
1270 	MLX5_SET(wq, wq, uar_page, sq->uar.index);
1271 	MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift -
1272 	    PAGE_SHIFT);
1273 	MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma);
1274 
1275 	mlx5_fill_page_array(&sq->wq_ctrl.buf,
1276 	    (__be64 *) MLX5_ADDR_OF(wq, wq, pas));
1277 
1278 	err = mlx5_core_create_sq(sq->priv->mdev, in, inlen, &sq->sqn);
1279 
1280 	kvfree(in);
1281 
1282 	return (err);
1283 }
1284 
1285 int
1286 mlx5e_modify_sq(struct mlx5e_sq *sq, int curr_state, int next_state)
1287 {
1288 	void *in;
1289 	void *sqc;
1290 	int inlen;
1291 	int err;
1292 
1293 	inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
1294 	in = mlx5_vzalloc(inlen);
1295 	if (in == NULL)
1296 		return (-ENOMEM);
1297 
1298 	sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
1299 
1300 	MLX5_SET(modify_sq_in, in, sqn, sq->sqn);
1301 	MLX5_SET(modify_sq_in, in, sq_state, curr_state);
1302 	MLX5_SET(sqc, sqc, state, next_state);
1303 
1304 	err = mlx5_core_modify_sq(sq->priv->mdev, in, inlen);
1305 
1306 	kvfree(in);
1307 
1308 	return (err);
1309 }
1310 
1311 void
1312 mlx5e_disable_sq(struct mlx5e_sq *sq)
1313 {
1314 
1315 	mlx5_core_destroy_sq(sq->priv->mdev, sq->sqn);
1316 }
1317 
1318 static int
1319 mlx5e_open_sq(struct mlx5e_channel *c,
1320     int tc,
1321     struct mlx5e_sq_param *param,
1322     struct mlx5e_sq *sq)
1323 {
1324 	int err;
1325 
1326 	err = mlx5e_create_sq(c, tc, param, sq);
1327 	if (err)
1328 		return (err);
1329 
1330 	err = mlx5e_enable_sq(sq, param, c->priv->tisn[tc]);
1331 	if (err)
1332 		goto err_destroy_sq;
1333 
1334 	err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY);
1335 	if (err)
1336 		goto err_disable_sq;
1337 
1338 	atomic_store_rel_int(&sq->queue_state, MLX5E_SQ_READY);
1339 
1340 	return (0);
1341 
1342 err_disable_sq:
1343 	mlx5e_disable_sq(sq);
1344 err_destroy_sq:
1345 	mlx5e_destroy_sq(sq);
1346 
1347 	return (err);
1348 }
1349 
1350 static void
1351 mlx5e_sq_send_nops_locked(struct mlx5e_sq *sq, int can_sleep)
1352 {
1353 	/* fill up remainder with NOPs */
1354 	while (sq->cev_counter != 0) {
1355 		while (!mlx5e_sq_has_room_for(sq, 1)) {
1356 			if (can_sleep != 0) {
1357 				mtx_unlock(&sq->lock);
1358 				msleep(4);
1359 				mtx_lock(&sq->lock);
1360 			} else {
1361 				goto done;
1362 			}
1363 		}
1364 		/* send a single NOP */
1365 		mlx5e_send_nop(sq, 1);
1366 		atomic_thread_fence_rel();
1367 	}
1368 done:
1369 	/* Check if we need to write the doorbell */
1370 	if (likely(sq->doorbell.d64 != 0)) {
1371 		mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0);
1372 		sq->doorbell.d64 = 0;
1373 	}
1374 }
1375 
1376 void
1377 mlx5e_sq_cev_timeout(void *arg)
1378 {
1379 	struct mlx5e_sq *sq = arg;
1380 
1381 	mtx_assert(&sq->lock, MA_OWNED);
1382 
1383 	/* check next state */
1384 	switch (sq->cev_next_state) {
1385 	case MLX5E_CEV_STATE_SEND_NOPS:
1386 		/* fill TX ring with NOPs, if any */
1387 		mlx5e_sq_send_nops_locked(sq, 0);
1388 
1389 		/* check if completed */
1390 		if (sq->cev_counter == 0) {
1391 			sq->cev_next_state = MLX5E_CEV_STATE_INITIAL;
1392 			return;
1393 		}
1394 		break;
1395 	default:
1396 		/* send NOPs on next timeout */
1397 		sq->cev_next_state = MLX5E_CEV_STATE_SEND_NOPS;
1398 		break;
1399 	}
1400 
1401 	/* restart timer */
1402 	callout_reset_curcpu(&sq->cev_callout, hz, mlx5e_sq_cev_timeout, sq);
1403 }
1404 
1405 void
1406 mlx5e_drain_sq(struct mlx5e_sq *sq)
1407 {
1408 	int error;
1409 	struct mlx5_core_dev *mdev= sq->priv->mdev;
1410 
1411 	/*
1412 	 * Check if already stopped.
1413 	 *
1414 	 * NOTE: The "stopped" variable is only written when both the
1415 	 * priv's configuration lock and the SQ's lock is locked. It
1416 	 * can therefore safely be read when only one of the two locks
1417 	 * is locked. This function is always called when the priv's
1418 	 * configuration lock is locked.
1419 	 */
1420 	if (sq->stopped != 0)
1421 		return;
1422 
1423 	mtx_lock(&sq->lock);
1424 
1425 	/* don't put more packets into the SQ */
1426 	sq->stopped = 1;
1427 
1428 	/* teardown event factor timer, if any */
1429 	sq->cev_next_state = MLX5E_CEV_STATE_HOLD_NOPS;
1430 	callout_stop(&sq->cev_callout);
1431 
1432 	/* send dummy NOPs in order to flush the transmit ring */
1433 	mlx5e_sq_send_nops_locked(sq, 1);
1434 	mtx_unlock(&sq->lock);
1435 
1436 	/* make sure it is safe to free the callout */
1437 	callout_drain(&sq->cev_callout);
1438 
1439 	/* wait till SQ is empty or link is down */
1440 	mtx_lock(&sq->lock);
1441 	while (sq->cc != sq->pc &&
1442 	    (sq->priv->media_status_last & IFM_ACTIVE) != 0 &&
1443 	    mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) {
1444 		mtx_unlock(&sq->lock);
1445 		msleep(1);
1446 		sq->cq.mcq.comp(&sq->cq.mcq);
1447 		mtx_lock(&sq->lock);
1448 	}
1449 	mtx_unlock(&sq->lock);
1450 
1451 	/* error out remaining requests */
1452 	error = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR);
1453 	if (error != 0) {
1454 		if_printf(sq->ifp,
1455 		    "mlx5e_modify_sq() from RDY to ERR failed: %d\n", error);
1456 	}
1457 
1458 	/* wait till SQ is empty */
1459 	mtx_lock(&sq->lock);
1460 	while (sq->cc != sq->pc &&
1461 	       mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) {
1462 		mtx_unlock(&sq->lock);
1463 		msleep(1);
1464 		sq->cq.mcq.comp(&sq->cq.mcq);
1465 		mtx_lock(&sq->lock);
1466 	}
1467 	mtx_unlock(&sq->lock);
1468 }
1469 
1470 static void
1471 mlx5e_close_sq_wait(struct mlx5e_sq *sq)
1472 {
1473 
1474 	mlx5e_drain_sq(sq);
1475 	mlx5e_disable_sq(sq);
1476 	mlx5e_destroy_sq(sq);
1477 }
1478 
1479 static int
1480 mlx5e_create_cq(struct mlx5e_priv *priv,
1481     struct mlx5e_cq_param *param,
1482     struct mlx5e_cq *cq,
1483     mlx5e_cq_comp_t *comp,
1484     int eq_ix)
1485 {
1486 	struct mlx5_core_dev *mdev = priv->mdev;
1487 	struct mlx5_core_cq *mcq = &cq->mcq;
1488 	int eqn_not_used;
1489 	int irqn;
1490 	int err;
1491 	u32 i;
1492 
1493 	param->wq.buf_numa_node = 0;
1494 	param->wq.db_numa_node = 0;
1495 
1496 	err = mlx5_cqwq_create(mdev, &param->wq, param->cqc, &cq->wq,
1497 	    &cq->wq_ctrl);
1498 	if (err)
1499 		return (err);
1500 
1501 	mlx5_vector2eqn(mdev, eq_ix, &eqn_not_used, &irqn);
1502 
1503 	mcq->cqe_sz = 64;
1504 	mcq->set_ci_db = cq->wq_ctrl.db.db;
1505 	mcq->arm_db = cq->wq_ctrl.db.db + 1;
1506 	*mcq->set_ci_db = 0;
1507 	*mcq->arm_db = 0;
1508 	mcq->vector = eq_ix;
1509 	mcq->comp = comp;
1510 	mcq->event = mlx5e_cq_error_event;
1511 	mcq->irqn = irqn;
1512 	mcq->uar = &priv->cq_uar;
1513 
1514 	for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
1515 		struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
1516 
1517 		cqe->op_own = 0xf1;
1518 	}
1519 
1520 	cq->priv = priv;
1521 
1522 	return (0);
1523 }
1524 
1525 static void
1526 mlx5e_destroy_cq(struct mlx5e_cq *cq)
1527 {
1528 	mlx5_wq_destroy(&cq->wq_ctrl);
1529 }
1530 
1531 static int
1532 mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param, int eq_ix)
1533 {
1534 	struct mlx5_core_cq *mcq = &cq->mcq;
1535 	void *in;
1536 	void *cqc;
1537 	int inlen;
1538 	int irqn_not_used;
1539 	int eqn;
1540 	int err;
1541 
1542 	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
1543 	    sizeof(u64) * cq->wq_ctrl.buf.npages;
1544 	in = mlx5_vzalloc(inlen);
1545 	if (in == NULL)
1546 		return (-ENOMEM);
1547 
1548 	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
1549 
1550 	memcpy(cqc, param->cqc, sizeof(param->cqc));
1551 
1552 	mlx5_fill_page_array(&cq->wq_ctrl.buf,
1553 	    (__be64 *) MLX5_ADDR_OF(create_cq_in, in, pas));
1554 
1555 	mlx5_vector2eqn(cq->priv->mdev, eq_ix, &eqn, &irqn_not_used);
1556 
1557 	MLX5_SET(cqc, cqc, c_eqn, eqn);
1558 	MLX5_SET(cqc, cqc, uar_page, mcq->uar->index);
1559 	MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
1560 	    PAGE_SHIFT);
1561 	MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
1562 
1563 	err = mlx5_core_create_cq(cq->priv->mdev, mcq, in, inlen);
1564 
1565 	kvfree(in);
1566 
1567 	if (err)
1568 		return (err);
1569 
1570 	mlx5e_cq_arm(cq, MLX5_GET_DOORBELL_LOCK(&cq->priv->doorbell_lock));
1571 
1572 	return (0);
1573 }
1574 
1575 static void
1576 mlx5e_disable_cq(struct mlx5e_cq *cq)
1577 {
1578 
1579 	mlx5_core_destroy_cq(cq->priv->mdev, &cq->mcq);
1580 }
1581 
1582 int
1583 mlx5e_open_cq(struct mlx5e_priv *priv,
1584     struct mlx5e_cq_param *param,
1585     struct mlx5e_cq *cq,
1586     mlx5e_cq_comp_t *comp,
1587     int eq_ix)
1588 {
1589 	int err;
1590 
1591 	err = mlx5e_create_cq(priv, param, cq, comp, eq_ix);
1592 	if (err)
1593 		return (err);
1594 
1595 	err = mlx5e_enable_cq(cq, param, eq_ix);
1596 	if (err)
1597 		goto err_destroy_cq;
1598 
1599 	return (0);
1600 
1601 err_destroy_cq:
1602 	mlx5e_destroy_cq(cq);
1603 
1604 	return (err);
1605 }
1606 
1607 void
1608 mlx5e_close_cq(struct mlx5e_cq *cq)
1609 {
1610 	mlx5e_disable_cq(cq);
1611 	mlx5e_destroy_cq(cq);
1612 }
1613 
1614 static int
1615 mlx5e_open_tx_cqs(struct mlx5e_channel *c,
1616     struct mlx5e_channel_param *cparam)
1617 {
1618 	int err;
1619 	int tc;
1620 
1621 	for (tc = 0; tc < c->num_tc; tc++) {
1622 		/* open completion queue */
1623 		err = mlx5e_open_cq(c->priv, &cparam->tx_cq, &c->sq[tc].cq,
1624 		    &mlx5e_tx_cq_comp, c->ix);
1625 		if (err)
1626 			goto err_close_tx_cqs;
1627 	}
1628 	return (0);
1629 
1630 err_close_tx_cqs:
1631 	for (tc--; tc >= 0; tc--)
1632 		mlx5e_close_cq(&c->sq[tc].cq);
1633 
1634 	return (err);
1635 }
1636 
1637 static void
1638 mlx5e_close_tx_cqs(struct mlx5e_channel *c)
1639 {
1640 	int tc;
1641 
1642 	for (tc = 0; tc < c->num_tc; tc++)
1643 		mlx5e_close_cq(&c->sq[tc].cq);
1644 }
1645 
1646 static int
1647 mlx5e_open_sqs(struct mlx5e_channel *c,
1648     struct mlx5e_channel_param *cparam)
1649 {
1650 	int err;
1651 	int tc;
1652 
1653 	for (tc = 0; tc < c->num_tc; tc++) {
1654 		err = mlx5e_open_sq(c, tc, &cparam->sq, &c->sq[tc]);
1655 		if (err)
1656 			goto err_close_sqs;
1657 	}
1658 
1659 	return (0);
1660 
1661 err_close_sqs:
1662 	for (tc--; tc >= 0; tc--)
1663 		mlx5e_close_sq_wait(&c->sq[tc]);
1664 
1665 	return (err);
1666 }
1667 
1668 static void
1669 mlx5e_close_sqs_wait(struct mlx5e_channel *c)
1670 {
1671 	int tc;
1672 
1673 	for (tc = 0; tc < c->num_tc; tc++)
1674 		mlx5e_close_sq_wait(&c->sq[tc]);
1675 }
1676 
1677 static void
1678 mlx5e_chan_mtx_init(struct mlx5e_channel *c)
1679 {
1680 	int tc;
1681 
1682 	mtx_init(&c->rq.mtx, "mlx5rx", MTX_NETWORK_LOCK, MTX_DEF);
1683 
1684 	callout_init_mtx(&c->rq.watchdog, &c->rq.mtx, 0);
1685 
1686 	for (tc = 0; tc < c->num_tc; tc++) {
1687 		struct mlx5e_sq *sq = c->sq + tc;
1688 
1689 		mtx_init(&sq->lock, "mlx5tx",
1690 		    MTX_NETWORK_LOCK " TX", MTX_DEF);
1691 		mtx_init(&sq->comp_lock, "mlx5comp",
1692 		    MTX_NETWORK_LOCK " TX", MTX_DEF);
1693 
1694 		callout_init_mtx(&sq->cev_callout, &sq->lock, 0);
1695 
1696 		sq->cev_factor = c->priv->params_ethtool.tx_completion_fact;
1697 
1698 		/* ensure the TX completion event factor is not zero */
1699 		if (sq->cev_factor == 0)
1700 			sq->cev_factor = 1;
1701 	}
1702 }
1703 
1704 static void
1705 mlx5e_chan_mtx_destroy(struct mlx5e_channel *c)
1706 {
1707 	int tc;
1708 
1709 	mtx_destroy(&c->rq.mtx);
1710 
1711 	for (tc = 0; tc < c->num_tc; tc++) {
1712 		mtx_destroy(&c->sq[tc].lock);
1713 		mtx_destroy(&c->sq[tc].comp_lock);
1714 	}
1715 }
1716 
1717 static int
1718 mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
1719     struct mlx5e_channel_param *cparam,
1720     struct mlx5e_channel *volatile *cp)
1721 {
1722 	struct mlx5e_channel *c;
1723 	int err;
1724 
1725 	c = malloc(sizeof(*c), M_MLX5EN, M_WAITOK | M_ZERO);
1726 	c->priv = priv;
1727 	c->ix = ix;
1728 	c->cpu = 0;
1729 	c->ifp = priv->ifp;
1730 	c->mkey_be = cpu_to_be32(priv->mr.key);
1731 	c->num_tc = priv->num_tc;
1732 
1733 	/* init mutexes */
1734 	mlx5e_chan_mtx_init(c);
1735 
1736 	/* open transmit completion queue */
1737 	err = mlx5e_open_tx_cqs(c, cparam);
1738 	if (err)
1739 		goto err_free;
1740 
1741 	/* open receive completion queue */
1742 	err = mlx5e_open_cq(c->priv, &cparam->rx_cq, &c->rq.cq,
1743 	    &mlx5e_rx_cq_comp, c->ix);
1744 	if (err)
1745 		goto err_close_tx_cqs;
1746 
1747 	err = mlx5e_open_sqs(c, cparam);
1748 	if (err)
1749 		goto err_close_rx_cq;
1750 
1751 	err = mlx5e_open_rq(c, &cparam->rq, &c->rq);
1752 	if (err)
1753 		goto err_close_sqs;
1754 
1755 	/* store channel pointer */
1756 	*cp = c;
1757 
1758 	/* poll receive queue initially */
1759 	c->rq.cq.mcq.comp(&c->rq.cq.mcq);
1760 
1761 	return (0);
1762 
1763 err_close_sqs:
1764 	mlx5e_close_sqs_wait(c);
1765 
1766 err_close_rx_cq:
1767 	mlx5e_close_cq(&c->rq.cq);
1768 
1769 err_close_tx_cqs:
1770 	mlx5e_close_tx_cqs(c);
1771 
1772 err_free:
1773 	/* destroy mutexes */
1774 	mlx5e_chan_mtx_destroy(c);
1775 	free(c, M_MLX5EN);
1776 	return (err);
1777 }
1778 
1779 static void
1780 mlx5e_close_channel(struct mlx5e_channel *volatile *pp)
1781 {
1782 	struct mlx5e_channel *c = *pp;
1783 
1784 	/* check if channel is already closed */
1785 	if (c == NULL)
1786 		return;
1787 	mlx5e_close_rq(&c->rq);
1788 }
1789 
1790 static void
1791 mlx5e_close_channel_wait(struct mlx5e_channel *volatile *pp)
1792 {
1793 	struct mlx5e_channel *c = *pp;
1794 
1795 	/* check if channel is already closed */
1796 	if (c == NULL)
1797 		return;
1798 	/* ensure channel pointer is no longer used */
1799 	*pp = NULL;
1800 
1801 	mlx5e_close_rq_wait(&c->rq);
1802 	mlx5e_close_sqs_wait(c);
1803 	mlx5e_close_cq(&c->rq.cq);
1804 	mlx5e_close_tx_cqs(c);
1805 	/* destroy mutexes */
1806 	mlx5e_chan_mtx_destroy(c);
1807 	free(c, M_MLX5EN);
1808 }
1809 
1810 static void
1811 mlx5e_build_rq_param(struct mlx5e_priv *priv,
1812     struct mlx5e_rq_param *param)
1813 {
1814 	void *rqc = param->rqc;
1815 	void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
1816 
1817 	MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST);
1818 	MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
1819 	MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe)));
1820 	MLX5_SET(wq, wq, log_wq_sz, priv->params.log_rq_size);
1821 	MLX5_SET(wq, wq, pd, priv->pdn);
1822 
1823 	param->wq.buf_numa_node = 0;
1824 	param->wq.db_numa_node = 0;
1825 	param->wq.linear = 1;
1826 }
1827 
1828 static void
1829 mlx5e_build_sq_param(struct mlx5e_priv *priv,
1830     struct mlx5e_sq_param *param)
1831 {
1832 	void *sqc = param->sqc;
1833 	void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
1834 
1835 	MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size);
1836 	MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
1837 	MLX5_SET(wq, wq, pd, priv->pdn);
1838 
1839 	param->wq.buf_numa_node = 0;
1840 	param->wq.db_numa_node = 0;
1841 	param->wq.linear = 1;
1842 }
1843 
1844 static void
1845 mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
1846     struct mlx5e_cq_param *param)
1847 {
1848 	void *cqc = param->cqc;
1849 
1850 	MLX5_SET(cqc, cqc, uar_page, priv->cq_uar.index);
1851 }
1852 
1853 static void
1854 mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
1855     struct mlx5e_cq_param *param)
1856 {
1857 	void *cqc = param->cqc;
1858 
1859 
1860 	/*
1861 	 * TODO The sysctl to control on/off is a bool value for now, which means
1862 	 * we only support CSUM, once HASH is implemnted we'll need to address that.
1863 	 */
1864 	if (priv->params.cqe_zipping_en) {
1865 		MLX5_SET(cqc, cqc, mini_cqe_res_format, MLX5_CQE_FORMAT_CSUM);
1866 		MLX5_SET(cqc, cqc, cqe_compression_en, 1);
1867 	}
1868 
1869 	MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_rq_size);
1870 	MLX5_SET(cqc, cqc, cq_period, priv->params.rx_cq_moderation_usec);
1871 	MLX5_SET(cqc, cqc, cq_max_count, priv->params.rx_cq_moderation_pkts);
1872 
1873 	switch (priv->params.rx_cq_moderation_mode) {
1874 	case 0:
1875 		MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1876 		break;
1877 	default:
1878 		if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
1879 			MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
1880 		else
1881 			MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1882 		break;
1883 	}
1884 
1885 	mlx5e_build_common_cq_param(priv, param);
1886 }
1887 
1888 static void
1889 mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
1890     struct mlx5e_cq_param *param)
1891 {
1892 	void *cqc = param->cqc;
1893 
1894 	MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_sq_size);
1895 	MLX5_SET(cqc, cqc, cq_period, priv->params.tx_cq_moderation_usec);
1896 	MLX5_SET(cqc, cqc, cq_max_count, priv->params.tx_cq_moderation_pkts);
1897 
1898 	switch (priv->params.tx_cq_moderation_mode) {
1899 	case 0:
1900 		MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1901 		break;
1902 	default:
1903 		if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe))
1904 			MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
1905 		else
1906 			MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
1907 		break;
1908 	}
1909 
1910 	mlx5e_build_common_cq_param(priv, param);
1911 }
1912 
1913 static void
1914 mlx5e_build_channel_param(struct mlx5e_priv *priv,
1915     struct mlx5e_channel_param *cparam)
1916 {
1917 	memset(cparam, 0, sizeof(*cparam));
1918 
1919 	mlx5e_build_rq_param(priv, &cparam->rq);
1920 	mlx5e_build_sq_param(priv, &cparam->sq);
1921 	mlx5e_build_rx_cq_param(priv, &cparam->rx_cq);
1922 	mlx5e_build_tx_cq_param(priv, &cparam->tx_cq);
1923 }
1924 
1925 static int
1926 mlx5e_open_channels(struct mlx5e_priv *priv)
1927 {
1928 	struct mlx5e_channel_param cparam;
1929 	void *ptr;
1930 	int err;
1931 	int i;
1932 	int j;
1933 
1934 	priv->channel = malloc(priv->params.num_channels *
1935 	    sizeof(struct mlx5e_channel *), M_MLX5EN, M_WAITOK | M_ZERO);
1936 
1937 	mlx5e_build_channel_param(priv, &cparam);
1938 	for (i = 0; i < priv->params.num_channels; i++) {
1939 		err = mlx5e_open_channel(priv, i, &cparam, &priv->channel[i]);
1940 		if (err)
1941 			goto err_close_channels;
1942 	}
1943 
1944 	for (j = 0; j < priv->params.num_channels; j++) {
1945 		err = mlx5e_wait_for_min_rx_wqes(&priv->channel[j]->rq);
1946 		if (err)
1947 			goto err_close_channels;
1948 	}
1949 
1950 	return (0);
1951 
1952 err_close_channels:
1953 	for (i--; i >= 0; i--) {
1954 		mlx5e_close_channel(&priv->channel[i]);
1955 		mlx5e_close_channel_wait(&priv->channel[i]);
1956 	}
1957 
1958 	/* remove "volatile" attribute from "channel" pointer */
1959 	ptr = __DECONST(void *, priv->channel);
1960 	priv->channel = NULL;
1961 
1962 	free(ptr, M_MLX5EN);
1963 
1964 	return (err);
1965 }
1966 
1967 static void
1968 mlx5e_close_channels(struct mlx5e_priv *priv)
1969 {
1970 	void *ptr;
1971 	int i;
1972 
1973 	if (priv->channel == NULL)
1974 		return;
1975 
1976 	for (i = 0; i < priv->params.num_channels; i++)
1977 		mlx5e_close_channel(&priv->channel[i]);
1978 	for (i = 0; i < priv->params.num_channels; i++)
1979 		mlx5e_close_channel_wait(&priv->channel[i]);
1980 
1981 	/* remove "volatile" attribute from "channel" pointer */
1982 	ptr = __DECONST(void *, priv->channel);
1983 	priv->channel = NULL;
1984 
1985 	free(ptr, M_MLX5EN);
1986 }
1987 
1988 static int
1989 mlx5e_refresh_sq_params(struct mlx5e_priv *priv, struct mlx5e_sq *sq)
1990 {
1991 
1992 	if (MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify)) {
1993 		uint8_t cq_mode;
1994 
1995 		switch (priv->params.tx_cq_moderation_mode) {
1996 		case 0:
1997 			cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
1998 			break;
1999 		default:
2000 			cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE;
2001 			break;
2002 		}
2003 
2004 		return (mlx5_core_modify_cq_moderation_mode(priv->mdev, &sq->cq.mcq,
2005 		    priv->params.tx_cq_moderation_usec,
2006 		    priv->params.tx_cq_moderation_pkts,
2007 		    cq_mode));
2008 	}
2009 
2010 	return (mlx5_core_modify_cq_moderation(priv->mdev, &sq->cq.mcq,
2011 	    priv->params.tx_cq_moderation_usec,
2012 	    priv->params.tx_cq_moderation_pkts));
2013 }
2014 
2015 static int
2016 mlx5e_refresh_rq_params(struct mlx5e_priv *priv, struct mlx5e_rq *rq)
2017 {
2018 
2019 	if (MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify)) {
2020 		uint8_t cq_mode;
2021 		int retval;
2022 
2023 		switch (priv->params.rx_cq_moderation_mode) {
2024 		case 0:
2025 			cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
2026 			break;
2027 		default:
2028 			cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE;
2029 			break;
2030 		}
2031 
2032 		retval = mlx5_core_modify_cq_moderation_mode(priv->mdev, &rq->cq.mcq,
2033 		    priv->params.rx_cq_moderation_usec,
2034 		    priv->params.rx_cq_moderation_pkts,
2035 		    cq_mode);
2036 
2037 		return (retval);
2038 	}
2039 
2040 	return (mlx5_core_modify_cq_moderation(priv->mdev, &rq->cq.mcq,
2041 	    priv->params.rx_cq_moderation_usec,
2042 	    priv->params.rx_cq_moderation_pkts));
2043 }
2044 
2045 static int
2046 mlx5e_refresh_channel_params_sub(struct mlx5e_priv *priv, struct mlx5e_channel *c)
2047 {
2048 	int err;
2049 	int i;
2050 
2051 	if (c == NULL)
2052 		return (EINVAL);
2053 
2054 	err = mlx5e_refresh_rq_params(priv, &c->rq);
2055 	if (err)
2056 		goto done;
2057 
2058 	for (i = 0; i != c->num_tc; i++) {
2059 		err = mlx5e_refresh_sq_params(priv, &c->sq[i]);
2060 		if (err)
2061 			goto done;
2062 	}
2063 done:
2064 	return (err);
2065 }
2066 
2067 int
2068 mlx5e_refresh_channel_params(struct mlx5e_priv *priv)
2069 {
2070 	int i;
2071 
2072 	if (priv->channel == NULL)
2073 		return (EINVAL);
2074 
2075 	for (i = 0; i < priv->params.num_channels; i++) {
2076 		int err;
2077 
2078 		err = mlx5e_refresh_channel_params_sub(priv, priv->channel[i]);
2079 		if (err)
2080 			return (err);
2081 	}
2082 	return (0);
2083 }
2084 
2085 static int
2086 mlx5e_open_tis(struct mlx5e_priv *priv, int tc)
2087 {
2088 	struct mlx5_core_dev *mdev = priv->mdev;
2089 	u32 in[MLX5_ST_SZ_DW(create_tis_in)];
2090 	void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
2091 
2092 	memset(in, 0, sizeof(in));
2093 
2094 	MLX5_SET(tisc, tisc, prio, tc);
2095 	MLX5_SET(tisc, tisc, transport_domain, priv->tdn);
2096 
2097 	return (mlx5_core_create_tis(mdev, in, sizeof(in), &priv->tisn[tc]));
2098 }
2099 
2100 static void
2101 mlx5e_close_tis(struct mlx5e_priv *priv, int tc)
2102 {
2103 	mlx5_core_destroy_tis(priv->mdev, priv->tisn[tc]);
2104 }
2105 
2106 static int
2107 mlx5e_open_tises(struct mlx5e_priv *priv)
2108 {
2109 	int num_tc = priv->num_tc;
2110 	int err;
2111 	int tc;
2112 
2113 	for (tc = 0; tc < num_tc; tc++) {
2114 		err = mlx5e_open_tis(priv, tc);
2115 		if (err)
2116 			goto err_close_tises;
2117 	}
2118 
2119 	return (0);
2120 
2121 err_close_tises:
2122 	for (tc--; tc >= 0; tc--)
2123 		mlx5e_close_tis(priv, tc);
2124 
2125 	return (err);
2126 }
2127 
2128 static void
2129 mlx5e_close_tises(struct mlx5e_priv *priv)
2130 {
2131 	int num_tc = priv->num_tc;
2132 	int tc;
2133 
2134 	for (tc = 0; tc < num_tc; tc++)
2135 		mlx5e_close_tis(priv, tc);
2136 }
2137 
2138 static int
2139 mlx5e_open_rqt(struct mlx5e_priv *priv)
2140 {
2141 	struct mlx5_core_dev *mdev = priv->mdev;
2142 	u32 *in;
2143 	u32 out[MLX5_ST_SZ_DW(create_rqt_out)] = {0};
2144 	void *rqtc;
2145 	int inlen;
2146 	int err;
2147 	int sz;
2148 	int i;
2149 
2150 	sz = 1 << priv->params.rx_hash_log_tbl_sz;
2151 
2152 	inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz;
2153 	in = mlx5_vzalloc(inlen);
2154 	if (in == NULL)
2155 		return (-ENOMEM);
2156 	rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
2157 
2158 	MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
2159 	MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
2160 
2161 	for (i = 0; i < sz; i++) {
2162 		int ix;
2163 #ifdef RSS
2164 		ix = rss_get_indirection_to_bucket(i);
2165 #else
2166 		ix = i;
2167 #endif
2168 		/* ensure we don't overflow */
2169 		ix %= priv->params.num_channels;
2170 		MLX5_SET(rqtc, rqtc, rq_num[i], priv->channel[ix]->rq.rqn);
2171 	}
2172 
2173 	MLX5_SET(create_rqt_in, in, opcode, MLX5_CMD_OP_CREATE_RQT);
2174 
2175 	err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
2176 	if (!err)
2177 		priv->rqtn = MLX5_GET(create_rqt_out, out, rqtn);
2178 
2179 	kvfree(in);
2180 
2181 	return (err);
2182 }
2183 
2184 static void
2185 mlx5e_close_rqt(struct mlx5e_priv *priv)
2186 {
2187 	u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {0};
2188 	u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)] = {0};
2189 
2190 	MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT);
2191 	MLX5_SET(destroy_rqt_in, in, rqtn, priv->rqtn);
2192 
2193 	mlx5_cmd_exec(priv->mdev, in, sizeof(in), out, sizeof(out));
2194 }
2195 
2196 static void
2197 mlx5e_build_tir_ctx(struct mlx5e_priv *priv, u32 * tirc, int tt)
2198 {
2199 	void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
2200 	__be32 *hkey;
2201 
2202 	MLX5_SET(tirc, tirc, transport_domain, priv->tdn);
2203 
2204 #define	ROUGH_MAX_L2_L3_HDR_SZ 256
2205 
2206 #define	MLX5_HASH_IP     (MLX5_HASH_FIELD_SEL_SRC_IP   |\
2207 			  MLX5_HASH_FIELD_SEL_DST_IP)
2208 
2209 #define	MLX5_HASH_ALL    (MLX5_HASH_FIELD_SEL_SRC_IP   |\
2210 			  MLX5_HASH_FIELD_SEL_DST_IP   |\
2211 			  MLX5_HASH_FIELD_SEL_L4_SPORT |\
2212 			  MLX5_HASH_FIELD_SEL_L4_DPORT)
2213 
2214 #define	MLX5_HASH_IP_IPSEC_SPI	(MLX5_HASH_FIELD_SEL_SRC_IP   |\
2215 				 MLX5_HASH_FIELD_SEL_DST_IP   |\
2216 				 MLX5_HASH_FIELD_SEL_IPSEC_SPI)
2217 
2218 	if (priv->params.hw_lro_en) {
2219 		MLX5_SET(tirc, tirc, lro_enable_mask,
2220 		    MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO |
2221 		    MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO);
2222 		MLX5_SET(tirc, tirc, lro_max_msg_sz,
2223 		    (priv->params.lro_wqe_sz -
2224 		    ROUGH_MAX_L2_L3_HDR_SZ) >> 8);
2225 		/* TODO: add the option to choose timer value dynamically */
2226 		MLX5_SET(tirc, tirc, lro_timeout_period_usecs,
2227 		    MLX5_CAP_ETH(priv->mdev,
2228 		    lro_timer_supported_periods[2]));
2229 	}
2230 
2231 	/* setup parameters for hashing TIR type, if any */
2232 	switch (tt) {
2233 	case MLX5E_TT_ANY:
2234 		MLX5_SET(tirc, tirc, disp_type,
2235 		    MLX5_TIRC_DISP_TYPE_DIRECT);
2236 		MLX5_SET(tirc, tirc, inline_rqn,
2237 		    priv->channel[0]->rq.rqn);
2238 		break;
2239 	default:
2240 		MLX5_SET(tirc, tirc, disp_type,
2241 		    MLX5_TIRC_DISP_TYPE_INDIRECT);
2242 		MLX5_SET(tirc, tirc, indirect_table,
2243 		    priv->rqtn);
2244 		MLX5_SET(tirc, tirc, rx_hash_fn,
2245 		    MLX5_TIRC_RX_HASH_FN_HASH_TOEPLITZ);
2246 		hkey = (__be32 *) MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
2247 #ifdef RSS
2248 		/*
2249 		 * The FreeBSD RSS implementation does currently not
2250 		 * support symmetric Toeplitz hashes:
2251 		 */
2252 		MLX5_SET(tirc, tirc, rx_hash_symmetric, 0);
2253 		rss_getkey((uint8_t *)hkey);
2254 #else
2255 		MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
2256 		hkey[0] = cpu_to_be32(0xD181C62C);
2257 		hkey[1] = cpu_to_be32(0xF7F4DB5B);
2258 		hkey[2] = cpu_to_be32(0x1983A2FC);
2259 		hkey[3] = cpu_to_be32(0x943E1ADB);
2260 		hkey[4] = cpu_to_be32(0xD9389E6B);
2261 		hkey[5] = cpu_to_be32(0xD1039C2C);
2262 		hkey[6] = cpu_to_be32(0xA74499AD);
2263 		hkey[7] = cpu_to_be32(0x593D56D9);
2264 		hkey[8] = cpu_to_be32(0xF3253C06);
2265 		hkey[9] = cpu_to_be32(0x2ADC1FFC);
2266 #endif
2267 		break;
2268 	}
2269 
2270 	switch (tt) {
2271 	case MLX5E_TT_IPV4_TCP:
2272 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2273 		    MLX5_L3_PROT_TYPE_IPV4);
2274 		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2275 		    MLX5_L4_PROT_TYPE_TCP);
2276 #ifdef RSS
2277 		if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV4)) {
2278 			MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2279 			    MLX5_HASH_IP);
2280 		} else
2281 #endif
2282 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2283 		    MLX5_HASH_ALL);
2284 		break;
2285 
2286 	case MLX5E_TT_IPV6_TCP:
2287 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2288 		    MLX5_L3_PROT_TYPE_IPV6);
2289 		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2290 		    MLX5_L4_PROT_TYPE_TCP);
2291 #ifdef RSS
2292 		if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV6)) {
2293 			MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2294 			    MLX5_HASH_IP);
2295 		} else
2296 #endif
2297 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2298 		    MLX5_HASH_ALL);
2299 		break;
2300 
2301 	case MLX5E_TT_IPV4_UDP:
2302 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2303 		    MLX5_L3_PROT_TYPE_IPV4);
2304 		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2305 		    MLX5_L4_PROT_TYPE_UDP);
2306 #ifdef RSS
2307 		if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV4)) {
2308 			MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2309 			    MLX5_HASH_IP);
2310 		} else
2311 #endif
2312 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2313 		    MLX5_HASH_ALL);
2314 		break;
2315 
2316 	case MLX5E_TT_IPV6_UDP:
2317 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2318 		    MLX5_L3_PROT_TYPE_IPV6);
2319 		MLX5_SET(rx_hash_field_select, hfso, l4_prot_type,
2320 		    MLX5_L4_PROT_TYPE_UDP);
2321 #ifdef RSS
2322 		if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV6)) {
2323 			MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2324 			    MLX5_HASH_IP);
2325 		} else
2326 #endif
2327 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2328 		    MLX5_HASH_ALL);
2329 		break;
2330 
2331 	case MLX5E_TT_IPV4_IPSEC_AH:
2332 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2333 		    MLX5_L3_PROT_TYPE_IPV4);
2334 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2335 		    MLX5_HASH_IP_IPSEC_SPI);
2336 		break;
2337 
2338 	case MLX5E_TT_IPV6_IPSEC_AH:
2339 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2340 		    MLX5_L3_PROT_TYPE_IPV6);
2341 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2342 		    MLX5_HASH_IP_IPSEC_SPI);
2343 		break;
2344 
2345 	case MLX5E_TT_IPV4_IPSEC_ESP:
2346 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2347 		    MLX5_L3_PROT_TYPE_IPV4);
2348 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2349 		    MLX5_HASH_IP_IPSEC_SPI);
2350 		break;
2351 
2352 	case MLX5E_TT_IPV6_IPSEC_ESP:
2353 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2354 		    MLX5_L3_PROT_TYPE_IPV6);
2355 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2356 		    MLX5_HASH_IP_IPSEC_SPI);
2357 		break;
2358 
2359 	case MLX5E_TT_IPV4:
2360 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2361 		    MLX5_L3_PROT_TYPE_IPV4);
2362 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2363 		    MLX5_HASH_IP);
2364 		break;
2365 
2366 	case MLX5E_TT_IPV6:
2367 		MLX5_SET(rx_hash_field_select, hfso, l3_prot_type,
2368 		    MLX5_L3_PROT_TYPE_IPV6);
2369 		MLX5_SET(rx_hash_field_select, hfso, selected_fields,
2370 		    MLX5_HASH_IP);
2371 		break;
2372 
2373 	default:
2374 		break;
2375 	}
2376 }
2377 
2378 static int
2379 mlx5e_open_tir(struct mlx5e_priv *priv, int tt)
2380 {
2381 	struct mlx5_core_dev *mdev = priv->mdev;
2382 	u32 *in;
2383 	void *tirc;
2384 	int inlen;
2385 	int err;
2386 
2387 	inlen = MLX5_ST_SZ_BYTES(create_tir_in);
2388 	in = mlx5_vzalloc(inlen);
2389 	if (in == NULL)
2390 		return (-ENOMEM);
2391 	tirc = MLX5_ADDR_OF(create_tir_in, in, tir_context);
2392 
2393 	mlx5e_build_tir_ctx(priv, tirc, tt);
2394 
2395 	err = mlx5_core_create_tir(mdev, in, inlen, &priv->tirn[tt]);
2396 
2397 	kvfree(in);
2398 
2399 	return (err);
2400 }
2401 
2402 static void
2403 mlx5e_close_tir(struct mlx5e_priv *priv, int tt)
2404 {
2405 	mlx5_core_destroy_tir(priv->mdev, priv->tirn[tt]);
2406 }
2407 
2408 static int
2409 mlx5e_open_tirs(struct mlx5e_priv *priv)
2410 {
2411 	int err;
2412 	int i;
2413 
2414 	for (i = 0; i < MLX5E_NUM_TT; i++) {
2415 		err = mlx5e_open_tir(priv, i);
2416 		if (err)
2417 			goto err_close_tirs;
2418 	}
2419 
2420 	return (0);
2421 
2422 err_close_tirs:
2423 	for (i--; i >= 0; i--)
2424 		mlx5e_close_tir(priv, i);
2425 
2426 	return (err);
2427 }
2428 
2429 static void
2430 mlx5e_close_tirs(struct mlx5e_priv *priv)
2431 {
2432 	int i;
2433 
2434 	for (i = 0; i < MLX5E_NUM_TT; i++)
2435 		mlx5e_close_tir(priv, i);
2436 }
2437 
2438 /*
2439  * SW MTU does not include headers,
2440  * HW MTU includes all headers and checksums.
2441  */
2442 static int
2443 mlx5e_set_dev_port_mtu(struct ifnet *ifp, int sw_mtu)
2444 {
2445 	struct mlx5e_priv *priv = ifp->if_softc;
2446 	struct mlx5_core_dev *mdev = priv->mdev;
2447 	int hw_mtu;
2448 	int err;
2449 
2450 	hw_mtu = MLX5E_SW2HW_MTU(sw_mtu);
2451 
2452 	err = mlx5_set_port_mtu(mdev, hw_mtu);
2453 	if (err) {
2454 		if_printf(ifp, "%s: mlx5_set_port_mtu failed setting %d, err=%d\n",
2455 		    __func__, sw_mtu, err);
2456 		return (err);
2457 	}
2458 
2459 	/* Update vport context MTU */
2460 	err = mlx5_set_vport_mtu(mdev, hw_mtu);
2461 	if (err) {
2462 		if_printf(ifp, "%s: Failed updating vport context with MTU size, err=%d\n",
2463 		    __func__, err);
2464 	}
2465 
2466 	ifp->if_mtu = sw_mtu;
2467 
2468 	err = mlx5_query_vport_mtu(mdev, &hw_mtu);
2469 	if (err || !hw_mtu) {
2470 		/* fallback to port oper mtu */
2471 		err = mlx5_query_port_oper_mtu(mdev, &hw_mtu);
2472 	}
2473 	if (err) {
2474 		if_printf(ifp, "Query port MTU, after setting new "
2475 		    "MTU value, failed\n");
2476 		return (err);
2477 	} else if (MLX5E_HW2SW_MTU(hw_mtu) < sw_mtu) {
2478 		err = -E2BIG,
2479 		if_printf(ifp, "Port MTU %d is smaller than "
2480                     "ifp mtu %d\n", hw_mtu, sw_mtu);
2481 	} else if (MLX5E_HW2SW_MTU(hw_mtu) > sw_mtu) {
2482 		err = -EINVAL;
2483                 if_printf(ifp, "Port MTU %d is bigger than "
2484                     "ifp mtu %d\n", hw_mtu, sw_mtu);
2485 	}
2486 	priv->params_ethtool.hw_mtu = hw_mtu;
2487 
2488 	return (err);
2489 }
2490 
2491 int
2492 mlx5e_open_locked(struct ifnet *ifp)
2493 {
2494 	struct mlx5e_priv *priv = ifp->if_softc;
2495 	int err;
2496 	u16 set_id;
2497 
2498 	/* check if already opened */
2499 	if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0)
2500 		return (0);
2501 
2502 #ifdef RSS
2503 	if (rss_getnumbuckets() > priv->params.num_channels) {
2504 		if_printf(ifp, "NOTE: There are more RSS buckets(%u) than "
2505 		    "channels(%u) available\n", rss_getnumbuckets(),
2506 		    priv->params.num_channels);
2507 	}
2508 #endif
2509 	err = mlx5e_open_tises(priv);
2510 	if (err) {
2511 		if_printf(ifp, "%s: mlx5e_open_tises failed, %d\n",
2512 		    __func__, err);
2513 		return (err);
2514 	}
2515 	err = mlx5_vport_alloc_q_counter(priv->mdev,
2516 	    MLX5_INTERFACE_PROTOCOL_ETH, &set_id);
2517 	if (err) {
2518 		if_printf(priv->ifp,
2519 		    "%s: mlx5_vport_alloc_q_counter failed: %d\n",
2520 		    __func__, err);
2521 		goto err_close_tises;
2522 	}
2523 	/* store counter set ID */
2524 	priv->counter_set_id = set_id;
2525 
2526 	err = mlx5e_open_channels(priv);
2527 	if (err) {
2528 		if_printf(ifp, "%s: mlx5e_open_channels failed, %d\n",
2529 		    __func__, err);
2530 		goto err_dalloc_q_counter;
2531 	}
2532 	err = mlx5e_open_rqt(priv);
2533 	if (err) {
2534 		if_printf(ifp, "%s: mlx5e_open_rqt failed, %d\n",
2535 		    __func__, err);
2536 		goto err_close_channels;
2537 	}
2538 	err = mlx5e_open_tirs(priv);
2539 	if (err) {
2540 		if_printf(ifp, "%s: mlx5e_open_tir failed, %d\n",
2541 		    __func__, err);
2542 		goto err_close_rqls;
2543 	}
2544 	err = mlx5e_open_flow_table(priv);
2545 	if (err) {
2546 		if_printf(ifp, "%s: mlx5e_open_flow_table failed, %d\n",
2547 		    __func__, err);
2548 		goto err_close_tirs;
2549 	}
2550 	err = mlx5e_add_all_vlan_rules(priv);
2551 	if (err) {
2552 		if_printf(ifp, "%s: mlx5e_add_all_vlan_rules failed, %d\n",
2553 		    __func__, err);
2554 		goto err_close_flow_table;
2555 	}
2556 	set_bit(MLX5E_STATE_OPENED, &priv->state);
2557 
2558 	mlx5e_update_carrier(priv);
2559 	mlx5e_set_rx_mode_core(priv);
2560 
2561 	return (0);
2562 
2563 err_close_flow_table:
2564 	mlx5e_close_flow_table(priv);
2565 
2566 err_close_tirs:
2567 	mlx5e_close_tirs(priv);
2568 
2569 err_close_rqls:
2570 	mlx5e_close_rqt(priv);
2571 
2572 err_close_channels:
2573 	mlx5e_close_channels(priv);
2574 
2575 err_dalloc_q_counter:
2576 	mlx5_vport_dealloc_q_counter(priv->mdev,
2577 	    MLX5_INTERFACE_PROTOCOL_ETH, priv->counter_set_id);
2578 
2579 err_close_tises:
2580 	mlx5e_close_tises(priv);
2581 
2582 	return (err);
2583 }
2584 
2585 static void
2586 mlx5e_open(void *arg)
2587 {
2588 	struct mlx5e_priv *priv = arg;
2589 
2590 	PRIV_LOCK(priv);
2591 	if (mlx5_set_port_status(priv->mdev, MLX5_PORT_UP))
2592 		if_printf(priv->ifp,
2593 		    "%s: Setting port status to up failed\n",
2594 		    __func__);
2595 
2596 	mlx5e_open_locked(priv->ifp);
2597 	priv->ifp->if_drv_flags |= IFF_DRV_RUNNING;
2598 	PRIV_UNLOCK(priv);
2599 }
2600 
2601 int
2602 mlx5e_close_locked(struct ifnet *ifp)
2603 {
2604 	struct mlx5e_priv *priv = ifp->if_softc;
2605 
2606 	/* check if already closed */
2607 	if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
2608 		return (0);
2609 
2610 	clear_bit(MLX5E_STATE_OPENED, &priv->state);
2611 
2612 	mlx5e_set_rx_mode_core(priv);
2613 	mlx5e_del_all_vlan_rules(priv);
2614 	if_link_state_change(priv->ifp, LINK_STATE_DOWN);
2615 	mlx5e_close_flow_table(priv);
2616 	mlx5e_close_tirs(priv);
2617 	mlx5e_close_rqt(priv);
2618 	mlx5e_close_channels(priv);
2619 	mlx5_vport_dealloc_q_counter(priv->mdev,
2620 	    MLX5_INTERFACE_PROTOCOL_ETH, priv->counter_set_id);
2621 	mlx5e_close_tises(priv);
2622 
2623 	return (0);
2624 }
2625 
2626 #if (__FreeBSD_version >= 1100000)
2627 static uint64_t
2628 mlx5e_get_counter(struct ifnet *ifp, ift_counter cnt)
2629 {
2630 	struct mlx5e_priv *priv = ifp->if_softc;
2631 	u64 retval;
2632 
2633 	/* PRIV_LOCK(priv); XXX not allowed */
2634 	switch (cnt) {
2635 	case IFCOUNTER_IPACKETS:
2636 		retval = priv->stats.vport.rx_packets;
2637 		break;
2638 	case IFCOUNTER_IERRORS:
2639 		retval = priv->stats.vport.rx_error_packets +
2640 		    priv->stats.pport.alignment_err +
2641 		    priv->stats.pport.check_seq_err +
2642 		    priv->stats.pport.crc_align_errors +
2643 		    priv->stats.pport.in_range_len_errors +
2644 		    priv->stats.pport.jabbers +
2645 		    priv->stats.pport.out_of_range_len +
2646 		    priv->stats.pport.oversize_pkts +
2647 		    priv->stats.pport.symbol_err +
2648 		    priv->stats.pport.too_long_errors +
2649 		    priv->stats.pport.undersize_pkts +
2650 		    priv->stats.pport.unsupported_op_rx;
2651 		break;
2652 	case IFCOUNTER_IQDROPS:
2653 		retval = priv->stats.vport.rx_out_of_buffer +
2654 		    priv->stats.pport.drop_events;
2655 		break;
2656 	case IFCOUNTER_OPACKETS:
2657 		retval = priv->stats.vport.tx_packets;
2658 		break;
2659 	case IFCOUNTER_OERRORS:
2660 		retval = priv->stats.vport.tx_error_packets;
2661 		break;
2662 	case IFCOUNTER_IBYTES:
2663 		retval = priv->stats.vport.rx_bytes;
2664 		break;
2665 	case IFCOUNTER_OBYTES:
2666 		retval = priv->stats.vport.tx_bytes;
2667 		break;
2668 	case IFCOUNTER_IMCASTS:
2669 		retval = priv->stats.vport.rx_multicast_packets;
2670 		break;
2671 	case IFCOUNTER_OMCASTS:
2672 		retval = priv->stats.vport.tx_multicast_packets;
2673 		break;
2674 	case IFCOUNTER_OQDROPS:
2675 		retval = priv->stats.vport.tx_queue_dropped;
2676 		break;
2677 	case IFCOUNTER_COLLISIONS:
2678 		retval = priv->stats.pport.collisions;
2679 		break;
2680 	default:
2681 		retval = if_get_counter_default(ifp, cnt);
2682 		break;
2683 	}
2684 	/* PRIV_UNLOCK(priv); XXX not allowed */
2685 	return (retval);
2686 }
2687 #endif
2688 
2689 static void
2690 mlx5e_set_rx_mode(struct ifnet *ifp)
2691 {
2692 	struct mlx5e_priv *priv = ifp->if_softc;
2693 
2694 	queue_work(priv->wq, &priv->set_rx_mode_work);
2695 }
2696 
2697 static int
2698 mlx5e_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
2699 {
2700 	struct mlx5e_priv *priv;
2701 	struct ifreq *ifr;
2702 	struct ifi2creq i2c;
2703 	int error = 0;
2704 	int mask = 0;
2705 	int size_read = 0;
2706 	int module_status;
2707 	int module_num;
2708 	int max_mtu;
2709 	uint8_t read_addr;
2710 
2711 	priv = ifp->if_softc;
2712 
2713 	/* check if detaching */
2714 	if (priv == NULL || priv->gone != 0)
2715 		return (ENXIO);
2716 
2717 	switch (command) {
2718 	case SIOCSIFMTU:
2719 		ifr = (struct ifreq *)data;
2720 
2721 		PRIV_LOCK(priv);
2722 		mlx5_query_port_max_mtu(priv->mdev, &max_mtu);
2723 
2724 		if (ifr->ifr_mtu >= MLX5E_MTU_MIN &&
2725 		    ifr->ifr_mtu <= MIN(MLX5E_MTU_MAX, max_mtu)) {
2726 			int was_opened;
2727 
2728 			was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
2729 			if (was_opened)
2730 				mlx5e_close_locked(ifp);
2731 
2732 			/* set new MTU */
2733 			mlx5e_set_dev_port_mtu(ifp, ifr->ifr_mtu);
2734 
2735 			if (was_opened)
2736 				mlx5e_open_locked(ifp);
2737 		} else {
2738 			error = EINVAL;
2739 			if_printf(ifp, "Invalid MTU value. Min val: %d, Max val: %d\n",
2740 			    MLX5E_MTU_MIN, MIN(MLX5E_MTU_MAX, max_mtu));
2741 		}
2742 		PRIV_UNLOCK(priv);
2743 		break;
2744 	case SIOCSIFFLAGS:
2745 		if ((ifp->if_flags & IFF_UP) &&
2746 		    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
2747 			mlx5e_set_rx_mode(ifp);
2748 			break;
2749 		}
2750 		PRIV_LOCK(priv);
2751 		if (ifp->if_flags & IFF_UP) {
2752 			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2753 				if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0)
2754 					mlx5e_open_locked(ifp);
2755 				ifp->if_drv_flags |= IFF_DRV_RUNNING;
2756 				mlx5_set_port_status(priv->mdev, MLX5_PORT_UP);
2757 			}
2758 		} else {
2759 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2760 				mlx5_set_port_status(priv->mdev,
2761 				    MLX5_PORT_DOWN);
2762 				if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0)
2763 					mlx5e_close_locked(ifp);
2764 				mlx5e_update_carrier(priv);
2765 				ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2766 			}
2767 		}
2768 		PRIV_UNLOCK(priv);
2769 		break;
2770 	case SIOCADDMULTI:
2771 	case SIOCDELMULTI:
2772 		mlx5e_set_rx_mode(ifp);
2773 		break;
2774 	case SIOCSIFMEDIA:
2775 	case SIOCGIFMEDIA:
2776 	case SIOCGIFXMEDIA:
2777 		ifr = (struct ifreq *)data;
2778 		error = ifmedia_ioctl(ifp, ifr, &priv->media, command);
2779 		break;
2780 	case SIOCSIFCAP:
2781 		ifr = (struct ifreq *)data;
2782 		PRIV_LOCK(priv);
2783 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2784 
2785 		if (mask & IFCAP_TXCSUM) {
2786 			ifp->if_capenable ^= IFCAP_TXCSUM;
2787 			ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
2788 
2789 			if (IFCAP_TSO4 & ifp->if_capenable &&
2790 			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
2791 				ifp->if_capenable &= ~IFCAP_TSO4;
2792 				ifp->if_hwassist &= ~CSUM_IP_TSO;
2793 				if_printf(ifp,
2794 				    "tso4 disabled due to -txcsum.\n");
2795 			}
2796 		}
2797 		if (mask & IFCAP_TXCSUM_IPV6) {
2798 			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
2799 			ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
2800 
2801 			if (IFCAP_TSO6 & ifp->if_capenable &&
2802 			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
2803 				ifp->if_capenable &= ~IFCAP_TSO6;
2804 				ifp->if_hwassist &= ~CSUM_IP6_TSO;
2805 				if_printf(ifp,
2806 				    "tso6 disabled due to -txcsum6.\n");
2807 			}
2808 		}
2809 		if (mask & IFCAP_RXCSUM)
2810 			ifp->if_capenable ^= IFCAP_RXCSUM;
2811 		if (mask & IFCAP_RXCSUM_IPV6)
2812 			ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
2813 		if (mask & IFCAP_TSO4) {
2814 			if (!(IFCAP_TSO4 & ifp->if_capenable) &&
2815 			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
2816 				if_printf(ifp, "enable txcsum first.\n");
2817 				error = EAGAIN;
2818 				goto out;
2819 			}
2820 			ifp->if_capenable ^= IFCAP_TSO4;
2821 			ifp->if_hwassist ^= CSUM_IP_TSO;
2822 		}
2823 		if (mask & IFCAP_TSO6) {
2824 			if (!(IFCAP_TSO6 & ifp->if_capenable) &&
2825 			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
2826 				if_printf(ifp, "enable txcsum6 first.\n");
2827 				error = EAGAIN;
2828 				goto out;
2829 			}
2830 			ifp->if_capenable ^= IFCAP_TSO6;
2831 			ifp->if_hwassist ^= CSUM_IP6_TSO;
2832 		}
2833 		if (mask & IFCAP_VLAN_HWFILTER) {
2834 			if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
2835 				mlx5e_disable_vlan_filter(priv);
2836 			else
2837 				mlx5e_enable_vlan_filter(priv);
2838 
2839 			ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
2840 		}
2841 		if (mask & IFCAP_VLAN_HWTAGGING)
2842 			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2843 		if (mask & IFCAP_WOL_MAGIC)
2844 			ifp->if_capenable ^= IFCAP_WOL_MAGIC;
2845 
2846 		VLAN_CAPABILITIES(ifp);
2847 		/* turn off LRO means also turn of HW LRO - if it's on */
2848 		if (mask & IFCAP_LRO) {
2849 			int was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
2850 			bool need_restart = false;
2851 
2852 			ifp->if_capenable ^= IFCAP_LRO;
2853 			if (!(ifp->if_capenable & IFCAP_LRO)) {
2854 				if (priv->params.hw_lro_en) {
2855 					priv->params.hw_lro_en = false;
2856 					need_restart = true;
2857 					/* Not sure this is the correct way */
2858 					priv->params_ethtool.hw_lro = priv->params.hw_lro_en;
2859 				}
2860 			}
2861 			if (was_opened && need_restart) {
2862 				mlx5e_close_locked(ifp);
2863 				mlx5e_open_locked(ifp);
2864 			}
2865 		}
2866 		if (mask & IFCAP_HWRXTSTMP) {
2867 			ifp->if_capenable ^= IFCAP_HWRXTSTMP;
2868 			if (ifp->if_capenable & IFCAP_HWRXTSTMP) {
2869 				if (priv->clbr_done == 0)
2870 					mlx5e_reset_calibration_callout(priv);
2871 			} else {
2872 				callout_drain(&priv->tstmp_clbr);
2873 				priv->clbr_done = 0;
2874 			}
2875 		}
2876 out:
2877 		PRIV_UNLOCK(priv);
2878 		break;
2879 
2880 	case SIOCGI2C:
2881 		ifr = (struct ifreq *)data;
2882 
2883 		/*
2884 		 * Copy from the user-space address ifr_data to the
2885 		 * kernel-space address i2c
2886 		 */
2887 		error = copyin(ifr->ifr_data, &i2c, sizeof(i2c));
2888 		if (error)
2889 			break;
2890 
2891 		if (i2c.len > sizeof(i2c.data)) {
2892 			error = EINVAL;
2893 			break;
2894 		}
2895 
2896 		PRIV_LOCK(priv);
2897 		/* Get module_num which is required for the query_eeprom */
2898 		error = mlx5_query_module_num(priv->mdev, &module_num);
2899 		if (error) {
2900 			if_printf(ifp, "Query module num failed, eeprom "
2901 			    "reading is not supported\n");
2902 			error = EINVAL;
2903 			goto err_i2c;
2904 		}
2905 		/* Check if module is present before doing an access */
2906 		module_status = mlx5_query_module_status(priv->mdev, module_num);
2907 		if (module_status != MLX5_MODULE_STATUS_PLUGGED_ENABLED &&
2908 		    module_status != MLX5_MODULE_STATUS_PLUGGED_DISABLED) {
2909 			error = EINVAL;
2910 			goto err_i2c;
2911 		}
2912 		/*
2913 		 * Currently 0XA0 and 0xA2 are the only addresses permitted.
2914 		 * The internal conversion is as follows:
2915 		 */
2916 		if (i2c.dev_addr == 0xA0)
2917 			read_addr = MLX5E_I2C_ADDR_LOW;
2918 		else if (i2c.dev_addr == 0xA2)
2919 			read_addr = MLX5E_I2C_ADDR_HIGH;
2920 		else {
2921 			if_printf(ifp, "Query eeprom failed, "
2922 			    "Invalid Address: %X\n", i2c.dev_addr);
2923 			error = EINVAL;
2924 			goto err_i2c;
2925 		}
2926 		error = mlx5_query_eeprom(priv->mdev,
2927 		    read_addr, MLX5E_EEPROM_LOW_PAGE,
2928 		    (uint32_t)i2c.offset, (uint32_t)i2c.len, module_num,
2929 		    (uint32_t *)i2c.data, &size_read);
2930 		if (error) {
2931 			if_printf(ifp, "Query eeprom failed, eeprom "
2932 			    "reading is not supported\n");
2933 			error = EINVAL;
2934 			goto err_i2c;
2935 		}
2936 
2937 		if (i2c.len > MLX5_EEPROM_MAX_BYTES) {
2938 			error = mlx5_query_eeprom(priv->mdev,
2939 			    read_addr, MLX5E_EEPROM_LOW_PAGE,
2940 			    (uint32_t)(i2c.offset + size_read),
2941 			    (uint32_t)(i2c.len - size_read), module_num,
2942 			    (uint32_t *)(i2c.data + size_read), &size_read);
2943 		}
2944 		if (error) {
2945 			if_printf(ifp, "Query eeprom failed, eeprom "
2946 			    "reading is not supported\n");
2947 			error = EINVAL;
2948 			goto err_i2c;
2949 		}
2950 
2951 		error = copyout(&i2c, ifr->ifr_data, sizeof(i2c));
2952 err_i2c:
2953 		PRIV_UNLOCK(priv);
2954 		break;
2955 
2956 	default:
2957 		error = ether_ioctl(ifp, command, data);
2958 		break;
2959 	}
2960 	return (error);
2961 }
2962 
2963 static int
2964 mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
2965 {
2966 	/*
2967 	 * TODO: uncoment once FW really sets all these bits if
2968 	 * (!mdev->caps.eth.rss_ind_tbl_cap || !mdev->caps.eth.csum_cap ||
2969 	 * !mdev->caps.eth.max_lso_cap || !mdev->caps.eth.vlan_cap ||
2970 	 * !(mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_SCQE_BRK_MOD)) return
2971 	 * -ENOTSUPP;
2972 	 */
2973 
2974 	/* TODO: add more must-to-have features */
2975 
2976 	if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
2977 		return (-ENODEV);
2978 
2979 	return (0);
2980 }
2981 
2982 static void
2983 mlx5e_build_ifp_priv(struct mlx5_core_dev *mdev,
2984     struct mlx5e_priv *priv,
2985     int num_comp_vectors)
2986 {
2987 	/*
2988 	 * TODO: Consider link speed for setting "log_sq_size",
2989 	 * "log_rq_size" and "cq_moderation_xxx":
2990 	 */
2991 	priv->params.log_sq_size =
2992 	    MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
2993 	priv->params.log_rq_size =
2994 	    MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
2995 	priv->params.rx_cq_moderation_usec =
2996 	    MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
2997 	    MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE :
2998 	    MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC;
2999 	priv->params.rx_cq_moderation_mode =
3000 	    MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? 1 : 0;
3001 	priv->params.rx_cq_moderation_pkts =
3002 	    MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS;
3003 	priv->params.tx_cq_moderation_usec =
3004 	    MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC;
3005 	priv->params.tx_cq_moderation_pkts =
3006 	    MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
3007 	priv->params.min_rx_wqes =
3008 	    MLX5E_PARAMS_DEFAULT_MIN_RX_WQES;
3009 	priv->params.rx_hash_log_tbl_sz =
3010 	    (order_base_2(num_comp_vectors) >
3011 	    MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ) ?
3012 	    order_base_2(num_comp_vectors) :
3013 	    MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ;
3014 	priv->params.num_tc = 1;
3015 	priv->params.default_vlan_prio = 0;
3016 	priv->counter_set_id = -1;
3017 
3018 	/*
3019 	 * hw lro is currently defaulted to off. when it won't anymore we
3020 	 * will consider the HW capability: "!!MLX5_CAP_ETH(mdev, lro_cap)"
3021 	 */
3022 	priv->params.hw_lro_en = false;
3023 	priv->params.lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
3024 
3025 	priv->params.cqe_zipping_en = !!MLX5_CAP_GEN(mdev, cqe_compression);
3026 
3027 	priv->mdev = mdev;
3028 	priv->params.num_channels = num_comp_vectors;
3029 	priv->order_base_2_num_channels = order_base_2(num_comp_vectors);
3030 	priv->queue_mapping_channel_mask =
3031 	    roundup_pow_of_two(num_comp_vectors) - 1;
3032 	priv->num_tc = priv->params.num_tc;
3033 	priv->default_vlan_prio = priv->params.default_vlan_prio;
3034 
3035 	INIT_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
3036 	INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work);
3037 	INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work);
3038 }
3039 
3040 static int
3041 mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn,
3042 		  struct mlx5_core_mr *mkey)
3043 {
3044 	struct ifnet *ifp = priv->ifp;
3045 	struct mlx5_core_dev *mdev = priv->mdev;
3046 	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
3047 	void *mkc;
3048 	u32 *in;
3049 	int err;
3050 
3051 	in = mlx5_vzalloc(inlen);
3052 	if (in == NULL) {
3053 		if_printf(ifp, "%s: failed to allocate inbox\n", __func__);
3054 		return (-ENOMEM);
3055 	}
3056 
3057 	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
3058 	MLX5_SET(mkc, mkc, access_mode, MLX5_ACCESS_MODE_PA);
3059 	MLX5_SET(mkc, mkc, lw, 1);
3060 	MLX5_SET(mkc, mkc, lr, 1);
3061 
3062 	MLX5_SET(mkc, mkc, pd, pdn);
3063 	MLX5_SET(mkc, mkc, length64, 1);
3064 	MLX5_SET(mkc, mkc, qpn, 0xffffff);
3065 
3066 	err = mlx5_core_create_mkey(mdev, mkey, in, inlen);
3067 	if (err)
3068 		if_printf(ifp, "%s: mlx5_core_create_mkey failed, %d\n",
3069 		    __func__, err);
3070 
3071 	kvfree(in);
3072 	return (err);
3073 }
3074 
3075 static const char *mlx5e_vport_stats_desc[] = {
3076 	MLX5E_VPORT_STATS(MLX5E_STATS_DESC)
3077 };
3078 
3079 static const char *mlx5e_pport_stats_desc[] = {
3080 	MLX5E_PPORT_STATS(MLX5E_STATS_DESC)
3081 };
3082 
3083 static void
3084 mlx5e_priv_mtx_init(struct mlx5e_priv *priv)
3085 {
3086 	mtx_init(&priv->async_events_mtx, "mlx5async", MTX_NETWORK_LOCK, MTX_DEF);
3087 	sx_init(&priv->state_lock, "mlx5state");
3088 	callout_init_mtx(&priv->watchdog, &priv->async_events_mtx, 0);
3089 	MLX5_INIT_DOORBELL_LOCK(&priv->doorbell_lock);
3090 }
3091 
3092 static void
3093 mlx5e_priv_mtx_destroy(struct mlx5e_priv *priv)
3094 {
3095 	mtx_destroy(&priv->async_events_mtx);
3096 	sx_destroy(&priv->state_lock);
3097 }
3098 
3099 static int
3100 sysctl_firmware(SYSCTL_HANDLER_ARGS)
3101 {
3102 	/*
3103 	 * %d.%d%.d the string format.
3104 	 * fw_rev_{maj,min,sub} return u16, 2^16 = 65536.
3105 	 * We need at most 5 chars to store that.
3106 	 * It also has: two "." and NULL at the end, which means we need 18
3107 	 * (5*3 + 3) chars at most.
3108 	 */
3109 	char fw[18];
3110 	struct mlx5e_priv *priv = arg1;
3111 	int error;
3112 
3113 	snprintf(fw, sizeof(fw), "%d.%d.%d", fw_rev_maj(priv->mdev), fw_rev_min(priv->mdev),
3114 	    fw_rev_sub(priv->mdev));
3115 	error = sysctl_handle_string(oidp, fw, sizeof(fw), req);
3116 	return (error);
3117 }
3118 
3119 static void
3120 mlx5e_disable_tx_dma(struct mlx5e_channel *ch)
3121 {
3122 	int i;
3123 
3124 	for (i = 0; i < ch->num_tc; i++)
3125 		mlx5e_drain_sq(&ch->sq[i]);
3126 }
3127 
3128 static void
3129 mlx5e_reset_sq_doorbell_record(struct mlx5e_sq *sq)
3130 {
3131 
3132 	sq->doorbell.d32[0] = cpu_to_be32(MLX5_OPCODE_NOP);
3133 	sq->doorbell.d32[1] = cpu_to_be32(sq->sqn << 8);
3134 	mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0);
3135 	sq->doorbell.d64 = 0;
3136 }
3137 
3138 void
3139 mlx5e_resume_sq(struct mlx5e_sq *sq)
3140 {
3141 	int err;
3142 
3143 	/* check if already enabled */
3144 	if (sq->stopped == 0)
3145 		return;
3146 
3147 	err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_ERR,
3148 	    MLX5_SQC_STATE_RST);
3149 	if (err != 0) {
3150 		if_printf(sq->ifp,
3151 		    "mlx5e_modify_sq() from ERR to RST failed: %d\n", err);
3152 	}
3153 
3154 	sq->cc = 0;
3155 	sq->pc = 0;
3156 
3157 	/* reset doorbell prior to moving from RST to RDY */
3158 	mlx5e_reset_sq_doorbell_record(sq);
3159 
3160 	err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST,
3161 	    MLX5_SQC_STATE_RDY);
3162 	if (err != 0) {
3163 		if_printf(sq->ifp,
3164 		    "mlx5e_modify_sq() from RST to RDY failed: %d\n", err);
3165 	}
3166 
3167 	mtx_lock(&sq->lock);
3168 	sq->cev_next_state = MLX5E_CEV_STATE_INITIAL;
3169 	sq->stopped = 0;
3170 	mtx_unlock(&sq->lock);
3171 
3172 }
3173 
3174 static void
3175 mlx5e_enable_tx_dma(struct mlx5e_channel *ch)
3176 {
3177         int i;
3178 
3179 	for (i = 0; i < ch->num_tc; i++)
3180 		mlx5e_resume_sq(&ch->sq[i]);
3181 }
3182 
3183 static void
3184 mlx5e_disable_rx_dma(struct mlx5e_channel *ch)
3185 {
3186 	struct mlx5e_rq *rq = &ch->rq;
3187 	int err;
3188 
3189 	mtx_lock(&rq->mtx);
3190 	rq->enabled = 0;
3191 	callout_stop(&rq->watchdog);
3192 	mtx_unlock(&rq->mtx);
3193 
3194 	callout_drain(&rq->watchdog);
3195 
3196 	err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR);
3197 	if (err != 0) {
3198 		if_printf(rq->ifp,
3199 		    "mlx5e_modify_rq() from RDY to RST failed: %d\n", err);
3200 	}
3201 
3202 	while (!mlx5_wq_ll_is_empty(&rq->wq)) {
3203 		msleep(1);
3204 		rq->cq.mcq.comp(&rq->cq.mcq);
3205 	}
3206 
3207 	/*
3208 	 * Transitioning into RST state will allow the FW to track less ERR state queues,
3209 	 * thus reducing the recv queue flushing time
3210 	 */
3211 	err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_ERR, MLX5_RQC_STATE_RST);
3212 	if (err != 0) {
3213 		if_printf(rq->ifp,
3214 		    "mlx5e_modify_rq() from ERR to RST failed: %d\n", err);
3215 	}
3216 }
3217 
3218 static void
3219 mlx5e_enable_rx_dma(struct mlx5e_channel *ch)
3220 {
3221 	struct mlx5e_rq *rq = &ch->rq;
3222 	int err;
3223 
3224 	rq->wq.wqe_ctr = 0;
3225 	mlx5_wq_ll_update_db_record(&rq->wq);
3226 	err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
3227 	if (err != 0) {
3228 		if_printf(rq->ifp,
3229 		    "mlx5e_modify_rq() from RST to RDY failed: %d\n", err);
3230         }
3231 
3232 	rq->enabled = 1;
3233 
3234 	rq->cq.mcq.comp(&rq->cq.mcq);
3235 }
3236 
3237 void
3238 mlx5e_modify_tx_dma(struct mlx5e_priv *priv, uint8_t value)
3239 {
3240 	int i;
3241 
3242 	if (priv->channel == NULL)
3243 		return;
3244 
3245 	for (i = 0; i < priv->params.num_channels; i++) {
3246 
3247 		if (!priv->channel[i])
3248 			continue;
3249 
3250 		if (value)
3251 			mlx5e_disable_tx_dma(priv->channel[i]);
3252 		else
3253 			mlx5e_enable_tx_dma(priv->channel[i]);
3254 	}
3255 }
3256 
3257 void
3258 mlx5e_modify_rx_dma(struct mlx5e_priv *priv, uint8_t value)
3259 {
3260 	int i;
3261 
3262 	if (priv->channel == NULL)
3263 		return;
3264 
3265 	for (i = 0; i < priv->params.num_channels; i++) {
3266 
3267 		if (!priv->channel[i])
3268 			continue;
3269 
3270 		if (value)
3271 			mlx5e_disable_rx_dma(priv->channel[i]);
3272 		else
3273 			mlx5e_enable_rx_dma(priv->channel[i]);
3274 	}
3275 }
3276 
3277 static void
3278 mlx5e_add_hw_stats(struct mlx5e_priv *priv)
3279 {
3280 	SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw),
3281 	    OID_AUTO, "fw_version", CTLTYPE_STRING | CTLFLAG_RD, priv, 0,
3282 	    sysctl_firmware, "A", "HCA firmware version");
3283 
3284 	SYSCTL_ADD_STRING(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw),
3285 	    OID_AUTO, "board_id", CTLFLAG_RD, priv->mdev->board_id, 0,
3286 	    "Board ID");
3287 }
3288 
3289 static int
3290 mlx5e_sysctl_tx_priority_flow_control(SYSCTL_HANDLER_ARGS)
3291 {
3292 	struct mlx5e_priv *priv = arg1;
3293 	uint32_t tx_pfc;
3294 	uint32_t value;
3295 	int error;
3296 
3297 	PRIV_LOCK(priv);
3298 
3299 	tx_pfc = priv->params.tx_priority_flow_control;
3300 
3301 	/* get current value */
3302 	value = (tx_pfc >> arg2) & 1;
3303 
3304 	error = sysctl_handle_32(oidp, &value, 0, req);
3305 
3306 	/* range check value */
3307 	if (value != 0)
3308 		priv->params.tx_priority_flow_control |= (1 << arg2);
3309 	else
3310 		priv->params.tx_priority_flow_control &= ~(1 << arg2);
3311 
3312 	/* check if update is required */
3313 	if (error == 0 && priv->gone == 0 &&
3314 	    tx_pfc != priv->params.tx_priority_flow_control) {
3315 		error = -mlx5e_set_port_pfc(priv);
3316 		/* restore previous value */
3317 		if (error != 0)
3318 			priv->params.tx_priority_flow_control= tx_pfc;
3319 	}
3320 	PRIV_UNLOCK(priv);
3321 
3322 	return (error);
3323 }
3324 
3325 static int
3326 mlx5e_sysctl_rx_priority_flow_control(SYSCTL_HANDLER_ARGS)
3327 {
3328 	struct mlx5e_priv *priv = arg1;
3329 	uint32_t rx_pfc;
3330 	uint32_t value;
3331 	int error;
3332 
3333 	PRIV_LOCK(priv);
3334 
3335 	rx_pfc = priv->params.rx_priority_flow_control;
3336 
3337 	/* get current value */
3338 	value = (rx_pfc >> arg2) & 1;
3339 
3340 	error = sysctl_handle_32(oidp, &value, 0, req);
3341 
3342 	/* range check value */
3343 	if (value != 0)
3344 		priv->params.rx_priority_flow_control |= (1 << arg2);
3345 	else
3346 		priv->params.rx_priority_flow_control &= ~(1 << arg2);
3347 
3348 	/* check if update is required */
3349 	if (error == 0 && priv->gone == 0 &&
3350 	    rx_pfc != priv->params.rx_priority_flow_control) {
3351 		error = -mlx5e_set_port_pfc(priv);
3352 		/* restore previous value */
3353 		if (error != 0)
3354 			priv->params.rx_priority_flow_control= rx_pfc;
3355 	}
3356 	PRIV_UNLOCK(priv);
3357 
3358 	return (error);
3359 }
3360 
3361 static void
3362 mlx5e_setup_pauseframes(struct mlx5e_priv *priv)
3363 {
3364 	unsigned int x;
3365 	char path[96];
3366 	int error;
3367 
3368 	/* Only receiving pauseframes is enabled by default */
3369 	priv->params.tx_pauseframe_control = 0;
3370 	priv->params.rx_pauseframe_control = 1;
3371 
3372 	/* disable ports flow control, PFC, by default */
3373 	priv->params.tx_priority_flow_control = 0;
3374 	priv->params.rx_priority_flow_control = 0;
3375 
3376 #if (__FreeBSD_version < 1100000)
3377 	/* compute path for sysctl */
3378 	snprintf(path, sizeof(path), "dev.mce.%d.tx_pauseframe_control",
3379 	    device_get_unit(priv->mdev->pdev->dev.bsddev));
3380 
3381 	/* try to fetch tunable, if any */
3382 	TUNABLE_INT_FETCH(path, &priv->params.tx_pauseframe_control);
3383 
3384 	/* compute path for sysctl */
3385 	snprintf(path, sizeof(path), "dev.mce.%d.rx_pauseframe_control",
3386 	    device_get_unit(priv->mdev->pdev->dev.bsddev));
3387 
3388 	/* try to fetch tunable, if any */
3389 	TUNABLE_INT_FETCH(path, &priv->params.rx_pauseframe_control);
3390 
3391 	for (x = 0; x != 8; x++) {
3392 
3393 		/* compute path for sysctl */
3394 		snprintf(path, sizeof(path), "dev.mce.%d.tx_priority_flow_control_%u",
3395 		    device_get_unit(priv->mdev->pdev->dev.bsddev), x);
3396 
3397 		/* try to fetch tunable, if any */
3398 		if (TUNABLE_INT_FETCH(path, &value) == 0 && value != 0)
3399 			priv->params.tx_priority_flow_control |= 1 << x;
3400 
3401 		/* compute path for sysctl */
3402 		snprintf(path, sizeof(path), "dev.mce.%d.rx_priority_flow_control_%u",
3403 		    device_get_unit(priv->mdev->pdev->dev.bsddev), x);
3404 
3405 		/* try to fetch tunable, if any */
3406 		if (TUNABLE_INT_FETCH(path, &value) == 0 && value != 0)
3407 			priv->params.rx_priority_flow_control |= 1 << x;
3408 	}
3409 #endif
3410 
3411 	/* register pauseframe SYSCTLs */
3412 	SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3413 	    OID_AUTO, "tx_pauseframe_control", CTLFLAG_RDTUN,
3414 	    &priv->params.tx_pauseframe_control, 0,
3415 	    "Set to enable TX pause frames. Clear to disable.");
3416 
3417 	SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3418 	    OID_AUTO, "rx_pauseframe_control", CTLFLAG_RDTUN,
3419 	    &priv->params.rx_pauseframe_control, 0,
3420 	    "Set to enable RX pause frames. Clear to disable.");
3421 
3422 	/* register priority_flow control, PFC, SYSCTLs */
3423 	for (x = 0; x != 8; x++) {
3424 		snprintf(path, sizeof(path), "tx_priority_flow_control_%u", x);
3425 
3426 		SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3427 		    OID_AUTO, path, CTLTYPE_UINT | CTLFLAG_RWTUN |
3428 		    CTLFLAG_MPSAFE, priv, x, &mlx5e_sysctl_tx_priority_flow_control, "IU",
3429 		    "Set to enable TX ports flow control frames for given priority. Clear to disable.");
3430 
3431 		snprintf(path, sizeof(path), "rx_priority_flow_control_%u", x);
3432 
3433 		SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3434 		    OID_AUTO, path, CTLTYPE_UINT | CTLFLAG_RWTUN |
3435 		    CTLFLAG_MPSAFE, priv, x, &mlx5e_sysctl_rx_priority_flow_control, "IU",
3436 		    "Set to enable RX ports flow control frames for given priority. Clear to disable.");
3437 	}
3438 
3439 	PRIV_LOCK(priv);
3440 
3441 	/* range check */
3442 	priv->params.tx_pauseframe_control =
3443 	    priv->params.tx_pauseframe_control ? 1 : 0;
3444 	priv->params.rx_pauseframe_control =
3445 	    priv->params.rx_pauseframe_control ? 1 : 0;
3446 
3447 	/* update firmware */
3448 	error = mlx5e_set_port_pause_and_pfc(priv);
3449 	if (error == -EINVAL) {
3450 		if_printf(priv->ifp,
3451 		    "Global pauseframes must be disabled before enabling PFC.\n");
3452 		priv->params.rx_priority_flow_control = 0;
3453 		priv->params.tx_priority_flow_control = 0;
3454 
3455 		/* update firmware */
3456 		(void) mlx5e_set_port_pause_and_pfc(priv);
3457 	}
3458 	PRIV_UNLOCK(priv);
3459 }
3460 
3461 static void *
3462 mlx5e_create_ifp(struct mlx5_core_dev *mdev)
3463 {
3464 	struct ifnet *ifp;
3465 	struct mlx5e_priv *priv;
3466 	u8 dev_addr[ETHER_ADDR_LEN] __aligned(4);
3467 	struct sysctl_oid_list *child;
3468 	int ncv = mdev->priv.eq_table.num_comp_vectors;
3469 	char unit[16];
3470 	int err;
3471 	int i;
3472 	u32 eth_proto_cap;
3473 
3474 	if (mlx5e_check_required_hca_cap(mdev)) {
3475 		mlx5_core_dbg(mdev, "mlx5e_check_required_hca_cap() failed\n");
3476 		return (NULL);
3477 	}
3478 	priv = malloc(sizeof(*priv), M_MLX5EN, M_WAITOK | M_ZERO);
3479 	mlx5e_priv_mtx_init(priv);
3480 
3481 	ifp = priv->ifp = if_alloc(IFT_ETHER);
3482 	if (ifp == NULL) {
3483 		mlx5_core_err(mdev, "if_alloc() failed\n");
3484 		goto err_free_priv;
3485 	}
3486 	ifp->if_softc = priv;
3487 	if_initname(ifp, "mce", device_get_unit(mdev->pdev->dev.bsddev));
3488 	ifp->if_mtu = ETHERMTU;
3489 	ifp->if_init = mlx5e_open;
3490 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
3491 	ifp->if_ioctl = mlx5e_ioctl;
3492 	ifp->if_transmit = mlx5e_xmit;
3493 	ifp->if_qflush = if_qflush;
3494 #if (__FreeBSD_version >= 1100000)
3495 	ifp->if_get_counter = mlx5e_get_counter;
3496 #endif
3497 	ifp->if_snd.ifq_maxlen = ifqmaxlen;
3498 	/*
3499          * Set driver features
3500          */
3501 	ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6;
3502 	ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING;
3503 	ifp->if_capabilities |= IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWFILTER;
3504 	ifp->if_capabilities |= IFCAP_LINKSTATE | IFCAP_JUMBO_MTU;
3505 	ifp->if_capabilities |= IFCAP_LRO;
3506 	ifp->if_capabilities |= IFCAP_TSO | IFCAP_VLAN_HWTSO;
3507 	ifp->if_capabilities |= IFCAP_HWSTATS | IFCAP_HWRXTSTMP;
3508 
3509 	/* set TSO limits so that we don't have to drop TX packets */
3510 	ifp->if_hw_tsomax = MLX5E_MAX_TX_PAYLOAD_SIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
3511 	ifp->if_hw_tsomaxsegcount = MLX5E_MAX_TX_MBUF_FRAGS - 1 /* hdr */;
3512 	ifp->if_hw_tsomaxsegsize = MLX5E_MAX_TX_MBUF_SIZE;
3513 
3514 	ifp->if_capenable = ifp->if_capabilities;
3515 	ifp->if_hwassist = 0;
3516 	if (ifp->if_capenable & IFCAP_TSO)
3517 		ifp->if_hwassist |= CSUM_TSO;
3518 	if (ifp->if_capenable & IFCAP_TXCSUM)
3519 		ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP);
3520 	if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
3521 		ifp->if_hwassist |= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
3522 
3523 	/* ifnet sysctl tree */
3524 	sysctl_ctx_init(&priv->sysctl_ctx);
3525 	priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_dev),
3526 	    OID_AUTO, ifp->if_dname, CTLFLAG_RD, 0, "MLX5 ethernet - interface name");
3527 	if (priv->sysctl_ifnet == NULL) {
3528 		mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
3529 		goto err_free_sysctl;
3530 	}
3531 	snprintf(unit, sizeof(unit), "%d", ifp->if_dunit);
3532 	priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3533 	    OID_AUTO, unit, CTLFLAG_RD, 0, "MLX5 ethernet - interface unit");
3534 	if (priv->sysctl_ifnet == NULL) {
3535 		mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
3536 		goto err_free_sysctl;
3537 	}
3538 
3539 	/* HW sysctl tree */
3540 	child = SYSCTL_CHILDREN(device_get_sysctl_tree(mdev->pdev->dev.bsddev));
3541 	priv->sysctl_hw = SYSCTL_ADD_NODE(&priv->sysctl_ctx, child,
3542 	    OID_AUTO, "hw", CTLFLAG_RD, 0, "MLX5 ethernet dev hw");
3543 	if (priv->sysctl_hw == NULL) {
3544 		mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n");
3545 		goto err_free_sysctl;
3546 	}
3547 	mlx5e_build_ifp_priv(mdev, priv, ncv);
3548 
3549 	snprintf(unit, sizeof(unit), "mce%u_wq",
3550 	    device_get_unit(mdev->pdev->dev.bsddev));
3551 	priv->wq = alloc_workqueue(unit, 0, 1);
3552 	if (priv->wq == NULL) {
3553 		if_printf(ifp, "%s: alloc_workqueue failed\n", __func__);
3554 		goto err_free_sysctl;
3555 	}
3556 
3557 	err = mlx5_alloc_map_uar(mdev, &priv->cq_uar);
3558 	if (err) {
3559 		if_printf(ifp, "%s: mlx5_alloc_map_uar failed, %d\n",
3560 		    __func__, err);
3561 		goto err_free_wq;
3562 	}
3563 	err = mlx5_core_alloc_pd(mdev, &priv->pdn);
3564 	if (err) {
3565 		if_printf(ifp, "%s: mlx5_core_alloc_pd failed, %d\n",
3566 		    __func__, err);
3567 		goto err_unmap_free_uar;
3568 	}
3569 	err = mlx5_alloc_transport_domain(mdev, &priv->tdn);
3570 	if (err) {
3571 		if_printf(ifp, "%s: mlx5_alloc_transport_domain failed, %d\n",
3572 		    __func__, err);
3573 		goto err_dealloc_pd;
3574 	}
3575 	err = mlx5e_create_mkey(priv, priv->pdn, &priv->mr);
3576 	if (err) {
3577 		if_printf(ifp, "%s: mlx5e_create_mkey failed, %d\n",
3578 		    __func__, err);
3579 		goto err_dealloc_transport_domain;
3580 	}
3581 	mlx5_query_nic_vport_mac_address(priv->mdev, 0, dev_addr);
3582 
3583 	/* check if we should generate a random MAC address */
3584 	if (MLX5_CAP_GEN(priv->mdev, vport_group_manager) == 0 &&
3585 	    is_zero_ether_addr(dev_addr)) {
3586 		random_ether_addr(dev_addr);
3587 		if_printf(ifp, "Assigned random MAC address\n");
3588 	}
3589 
3590 	/* set default MTU */
3591 	mlx5e_set_dev_port_mtu(ifp, ifp->if_mtu);
3592 
3593 	/* Set desc */
3594 	device_set_desc(mdev->pdev->dev.bsddev, mlx5e_version);
3595 
3596 	/* Set default media status */
3597 	priv->media_status_last = IFM_AVALID;
3598 	priv->media_active_last = IFM_ETHER | IFM_AUTO |
3599 	    IFM_ETH_RXPAUSE | IFM_FDX;
3600 
3601 	/* setup default pauseframes configuration */
3602 	mlx5e_setup_pauseframes(priv);
3603 
3604 	err = mlx5_query_port_proto_cap(mdev, &eth_proto_cap, MLX5_PTYS_EN);
3605 	if (err) {
3606 		eth_proto_cap = 0;
3607 		if_printf(ifp, "%s: Query port media capability failed, %d\n",
3608 		    __func__, err);
3609 	}
3610 
3611 	/* Setup supported medias */
3612 	ifmedia_init(&priv->media, IFM_IMASK | IFM_ETH_FMASK,
3613 	    mlx5e_media_change, mlx5e_media_status);
3614 
3615 	for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) {
3616 		if (mlx5e_mode_table[i].baudrate == 0)
3617 			continue;
3618 		if (MLX5E_PROT_MASK(i) & eth_proto_cap) {
3619 			ifmedia_add(&priv->media,
3620 			    mlx5e_mode_table[i].subtype |
3621 			    IFM_ETHER, 0, NULL);
3622 			ifmedia_add(&priv->media,
3623 			    mlx5e_mode_table[i].subtype |
3624 			    IFM_ETHER | IFM_FDX |
3625 			    IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL);
3626 		}
3627 	}
3628 
3629 	ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO, 0, NULL);
3630 	ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO | IFM_FDX |
3631 	    IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL);
3632 
3633 	/* Set autoselect by default */
3634 	ifmedia_set(&priv->media, IFM_ETHER | IFM_AUTO | IFM_FDX |
3635 	    IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE);
3636 	ether_ifattach(ifp, dev_addr);
3637 
3638 	/* Register for VLAN events */
3639 	priv->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
3640 	    mlx5e_vlan_rx_add_vid, priv, EVENTHANDLER_PRI_FIRST);
3641 	priv->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
3642 	    mlx5e_vlan_rx_kill_vid, priv, EVENTHANDLER_PRI_FIRST);
3643 
3644 	/* Link is down by default */
3645 	if_link_state_change(ifp, LINK_STATE_DOWN);
3646 
3647 	mlx5e_enable_async_events(priv);
3648 
3649 	mlx5e_add_hw_stats(priv);
3650 
3651 	mlx5e_create_stats(&priv->stats.vport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3652 	    "vstats", mlx5e_vport_stats_desc, MLX5E_VPORT_STATS_NUM,
3653 	    priv->stats.vport.arg);
3654 
3655 	mlx5e_create_stats(&priv->stats.pport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3656 	    "pstats", mlx5e_pport_stats_desc, MLX5E_PPORT_STATS_NUM,
3657 	    priv->stats.pport.arg);
3658 
3659 	mlx5e_create_ethtool(priv);
3660 
3661 	mtx_lock(&priv->async_events_mtx);
3662 	mlx5e_update_stats(priv);
3663 	mtx_unlock(&priv->async_events_mtx);
3664 
3665 	SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet),
3666 	    OID_AUTO, "rx_clbr_done", CTLFLAG_RD,
3667 	    &priv->clbr_done, 0,
3668 	    "RX timestamps calibration state");
3669 	callout_init(&priv->tstmp_clbr, CALLOUT_DIRECT);
3670 	mlx5e_reset_calibration_callout(priv);
3671 
3672 	return (priv);
3673 
3674 err_dealloc_transport_domain:
3675 	mlx5_dealloc_transport_domain(mdev, priv->tdn);
3676 
3677 err_dealloc_pd:
3678 	mlx5_core_dealloc_pd(mdev, priv->pdn);
3679 
3680 err_unmap_free_uar:
3681 	mlx5_unmap_free_uar(mdev, &priv->cq_uar);
3682 
3683 err_free_wq:
3684 	destroy_workqueue(priv->wq);
3685 
3686 err_free_sysctl:
3687 	sysctl_ctx_free(&priv->sysctl_ctx);
3688 
3689 	if_free(ifp);
3690 
3691 err_free_priv:
3692 	mlx5e_priv_mtx_destroy(priv);
3693 	free(priv, M_MLX5EN);
3694 	return (NULL);
3695 }
3696 
3697 static void
3698 mlx5e_destroy_ifp(struct mlx5_core_dev *mdev, void *vpriv)
3699 {
3700 	struct mlx5e_priv *priv = vpriv;
3701 	struct ifnet *ifp = priv->ifp;
3702 
3703 	/* don't allow more IOCTLs */
3704 	priv->gone = 1;
3705 
3706 	/*
3707 	 * Clear the device description to avoid use after free,
3708 	 * because the bsddev is not destroyed when this module is
3709 	 * unloaded:
3710 	 */
3711 	device_set_desc(mdev->pdev->dev.bsddev, NULL);
3712 
3713 	/* XXX wait a bit to allow IOCTL handlers to complete */
3714 	pause("W", hz);
3715 
3716 	/* stop watchdog timer */
3717 	callout_drain(&priv->watchdog);
3718 
3719 	callout_drain(&priv->tstmp_clbr);
3720 
3721 	if (priv->vlan_attach != NULL)
3722 		EVENTHANDLER_DEREGISTER(vlan_config, priv->vlan_attach);
3723 	if (priv->vlan_detach != NULL)
3724 		EVENTHANDLER_DEREGISTER(vlan_unconfig, priv->vlan_detach);
3725 
3726 	/* make sure device gets closed */
3727 	PRIV_LOCK(priv);
3728 	mlx5e_close_locked(ifp);
3729 	PRIV_UNLOCK(priv);
3730 
3731 	/* unregister device */
3732 	ifmedia_removeall(&priv->media);
3733 	ether_ifdetach(ifp);
3734 	if_free(ifp);
3735 
3736 	/* destroy all remaining sysctl nodes */
3737 	if (priv->sysctl_debug)
3738 		sysctl_ctx_free(&priv->stats.port_stats_debug.ctx);
3739 	sysctl_ctx_free(&priv->stats.vport.ctx);
3740 	sysctl_ctx_free(&priv->stats.pport.ctx);
3741 	sysctl_ctx_free(&priv->sysctl_ctx);
3742 
3743 	mlx5_core_destroy_mkey(priv->mdev, &priv->mr);
3744 	mlx5_dealloc_transport_domain(priv->mdev, priv->tdn);
3745 	mlx5_core_dealloc_pd(priv->mdev, priv->pdn);
3746 	mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar);
3747 	mlx5e_disable_async_events(priv);
3748 	destroy_workqueue(priv->wq);
3749 	mlx5e_priv_mtx_destroy(priv);
3750 	free(priv, M_MLX5EN);
3751 }
3752 
3753 static void *
3754 mlx5e_get_ifp(void *vpriv)
3755 {
3756 	struct mlx5e_priv *priv = vpriv;
3757 
3758 	return (priv->ifp);
3759 }
3760 
3761 static struct mlx5_interface mlx5e_interface = {
3762 	.add = mlx5e_create_ifp,
3763 	.remove = mlx5e_destroy_ifp,
3764 	.event = mlx5e_async_event,
3765 	.protocol = MLX5_INTERFACE_PROTOCOL_ETH,
3766 	.get_dev = mlx5e_get_ifp,
3767 };
3768 
3769 void
3770 mlx5e_init(void)
3771 {
3772 	mlx5_register_interface(&mlx5e_interface);
3773 }
3774 
3775 void
3776 mlx5e_cleanup(void)
3777 {
3778 	mlx5_unregister_interface(&mlx5e_interface);
3779 }
3780 
3781 module_init_order(mlx5e_init, SI_ORDER_THIRD);
3782 module_exit_order(mlx5e_cleanup, SI_ORDER_THIRD);
3783 
3784 #if (__FreeBSD_version >= 1100000)
3785 MODULE_DEPEND(mlx5en, linuxkpi, 1, 1, 1);
3786 #endif
3787 MODULE_DEPEND(mlx5en, mlx5, 1, 1, 1);
3788 MODULE_VERSION(mlx5en, 1);
3789