xref: /linux/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c (revision ab431bc39741e9d9bd3102688439e1864c857a74)
1 /*
2  * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #include <linux/netdevice.h>
34 #include <net/bonding.h>
35 #include <linux/mlx5/driver.h>
36 #include <linux/mlx5/eswitch.h>
37 #include <linux/mlx5/vport.h>
38 #include "lib/mlx5.h"
39 #include "lib/devcom.h"
40 #include "mlx5_core.h"
41 #include "eswitch.h"
42 #include "esw/acl/ofld.h"
43 #include "lag.h"
44 #include "mp.h"
45 #include "mpesw.h"
46 
47 
48 /* General purpose, use for short periods of time.
49  * Beware of lock dependencies (preferably, no locks should be acquired
50  * under it).
51  */
52 static DEFINE_SPINLOCK(lag_lock);
53 
get_port_sel_mode(enum mlx5_lag_mode mode,unsigned long flags)54 static int get_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags)
55 {
56 	if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
57 		return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT;
58 
59 	if (mode == MLX5_LAG_MODE_MPESW)
60 		return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW;
61 
62 	return MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY;
63 }
64 
lag_active_port_bits(struct mlx5_lag * ldev)65 static u8 lag_active_port_bits(struct mlx5_lag *ldev)
66 {
67 	u8 enabled_ports[MLX5_MAX_PORTS] = {};
68 	u8 active_port = 0;
69 	int num_enabled;
70 	int idx;
71 
72 	mlx5_infer_tx_enabled(&ldev->tracker, ldev, enabled_ports,
73 			      &num_enabled);
74 	for (idx = 0; idx < num_enabled; idx++)
75 		active_port |= BIT_MASK(enabled_ports[idx]);
76 
77 	return active_port;
78 }
79 
mlx5_cmd_create_lag(struct mlx5_core_dev * dev,struct mlx5_lag * ldev,int mode,unsigned long flags)80 static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, struct mlx5_lag *ldev,
81 			       int mode, unsigned long flags)
82 {
83 	bool fdb_sel_mode = test_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE,
84 				     &flags);
85 	int port_sel_mode = get_port_sel_mode(mode, flags);
86 	u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {};
87 	u8 *ports = ldev->v2p_map;
88 	int idx0, idx1;
89 	void *lag_ctx;
90 
91 	lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
92 	MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG);
93 	MLX5_SET(lagc, lag_ctx, fdb_selection_mode, fdb_sel_mode);
94 	idx0 = mlx5_lag_get_dev_index_by_seq(ldev, 0);
95 	idx1 = mlx5_lag_get_dev_index_by_seq(ldev, 1);
96 
97 	if (idx0 < 0 || idx1 < 0)
98 		return -EINVAL;
99 
100 	switch (port_sel_mode) {
101 	case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY:
102 		MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[idx0]);
103 		MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[idx1]);
104 		break;
105 	case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT:
106 		if (!MLX5_CAP_PORT_SELECTION(dev, port_select_flow_table_bypass))
107 			break;
108 
109 		MLX5_SET(lagc, lag_ctx, active_port,
110 			 lag_active_port_bits(mlx5_lag_dev(dev)));
111 		break;
112 	default:
113 		break;
114 	}
115 	MLX5_SET(lagc, lag_ctx, port_select_mode, port_sel_mode);
116 
117 	return mlx5_cmd_exec_in(dev, create_lag, in);
118 }
119 
mlx5_cmd_modify_lag(struct mlx5_core_dev * dev,struct mlx5_lag * ldev,u8 * ports)120 static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, struct mlx5_lag *ldev,
121 			       u8 *ports)
122 {
123 	u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
124 	void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
125 	int idx0, idx1;
126 
127 	idx0 = mlx5_lag_get_dev_index_by_seq(ldev, 0);
128 	idx1 = mlx5_lag_get_dev_index_by_seq(ldev, 1);
129 	if (idx0 < 0 || idx1 < 0)
130 		return -EINVAL;
131 
132 	MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
133 	MLX5_SET(modify_lag_in, in, field_select, 0x1);
134 
135 	MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[idx0]);
136 	MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[idx1]);
137 
138 	return mlx5_cmd_exec_in(dev, modify_lag, in);
139 }
140 
mlx5_cmd_create_vport_lag(struct mlx5_core_dev * dev)141 int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev)
142 {
143 	u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {};
144 
145 	MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG);
146 
147 	return mlx5_cmd_exec_in(dev, create_vport_lag, in);
148 }
149 EXPORT_SYMBOL(mlx5_cmd_create_vport_lag);
150 
mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev * dev)151 int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
152 {
153 	u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {};
154 
155 	MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG);
156 
157 	return mlx5_cmd_exec_in(dev, destroy_vport_lag, in);
158 }
159 EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
160 
mlx5_infer_tx_disabled(struct lag_tracker * tracker,struct mlx5_lag * ldev,u8 * ports,int * num_disabled)161 static void mlx5_infer_tx_disabled(struct lag_tracker *tracker, struct mlx5_lag *ldev,
162 				   u8 *ports, int *num_disabled)
163 {
164 	int i;
165 
166 	*num_disabled = 0;
167 	mlx5_ldev_for_each(i, 0, ldev)
168 		if (!tracker->netdev_state[i].tx_enabled ||
169 		    !tracker->netdev_state[i].link_up)
170 			ports[(*num_disabled)++] = i;
171 }
172 
mlx5_infer_tx_enabled(struct lag_tracker * tracker,struct mlx5_lag * ldev,u8 * ports,int * num_enabled)173 void mlx5_infer_tx_enabled(struct lag_tracker *tracker, struct mlx5_lag *ldev,
174 			   u8 *ports, int *num_enabled)
175 {
176 	int i;
177 
178 	*num_enabled = 0;
179 	mlx5_ldev_for_each(i, 0, ldev)
180 		if (tracker->netdev_state[i].tx_enabled &&
181 		    tracker->netdev_state[i].link_up)
182 			ports[(*num_enabled)++] = i;
183 
184 	if (*num_enabled == 0)
185 		mlx5_infer_tx_disabled(tracker, ldev, ports, num_enabled);
186 }
187 
mlx5_lag_print_mapping(struct mlx5_core_dev * dev,struct mlx5_lag * ldev,struct lag_tracker * tracker,unsigned long flags)188 static void mlx5_lag_print_mapping(struct mlx5_core_dev *dev,
189 				   struct mlx5_lag *ldev,
190 				   struct lag_tracker *tracker,
191 				   unsigned long flags)
192 {
193 	char buf[MLX5_MAX_PORTS * 10 + 1] = {};
194 	u8 enabled_ports[MLX5_MAX_PORTS] = {};
195 	int written = 0;
196 	int num_enabled;
197 	int idx;
198 	int err;
199 	int i;
200 	int j;
201 
202 	if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
203 		mlx5_infer_tx_enabled(tracker, ldev, enabled_ports,
204 				      &num_enabled);
205 		for (i = 0; i < num_enabled; i++) {
206 			err = scnprintf(buf + written, 4, "%d, ", enabled_ports[i] + 1);
207 			if (err != 3)
208 				return;
209 			written += err;
210 		}
211 		buf[written - 2] = 0;
212 		mlx5_core_info(dev, "lag map active ports: %s\n", buf);
213 	} else {
214 		mlx5_ldev_for_each(i, 0, ldev) {
215 			for (j  = 0; j < ldev->buckets; j++) {
216 				idx = i * ldev->buckets + j;
217 				err = scnprintf(buf + written, 10,
218 						" port %d:%d", i + 1, ldev->v2p_map[idx]);
219 				if (err != 9)
220 					return;
221 				written += err;
222 			}
223 		}
224 		mlx5_core_info(dev, "lag map:%s\n", buf);
225 	}
226 }
227 
228 static int mlx5_lag_netdev_event(struct notifier_block *this,
229 				 unsigned long event, void *ptr);
230 static void mlx5_do_bond_work(struct work_struct *work);
231 
mlx5_ldev_free(struct kref * ref)232 static void mlx5_ldev_free(struct kref *ref)
233 {
234 	struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref);
235 	struct net *net;
236 
237 	if (ldev->nb.notifier_call) {
238 		net = read_pnet(&ldev->net);
239 		unregister_netdevice_notifier_net(net, &ldev->nb);
240 	}
241 
242 	mlx5_lag_mp_cleanup(ldev);
243 	cancel_delayed_work_sync(&ldev->bond_work);
244 	destroy_workqueue(ldev->wq);
245 	mutex_destroy(&ldev->lock);
246 	kfree(ldev);
247 }
248 
mlx5_ldev_put(struct mlx5_lag * ldev)249 static void mlx5_ldev_put(struct mlx5_lag *ldev)
250 {
251 	kref_put(&ldev->ref, mlx5_ldev_free);
252 }
253 
mlx5_ldev_get(struct mlx5_lag * ldev)254 static void mlx5_ldev_get(struct mlx5_lag *ldev)
255 {
256 	kref_get(&ldev->ref);
257 }
258 
mlx5_lag_dev_alloc(struct mlx5_core_dev * dev)259 static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev)
260 {
261 	struct mlx5_lag *ldev;
262 	int err;
263 
264 	ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
265 	if (!ldev)
266 		return NULL;
267 
268 	ldev->wq = create_singlethread_workqueue("mlx5_lag");
269 	if (!ldev->wq) {
270 		kfree(ldev);
271 		return NULL;
272 	}
273 
274 	kref_init(&ldev->ref);
275 	mutex_init(&ldev->lock);
276 	INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
277 
278 	ldev->nb.notifier_call = mlx5_lag_netdev_event;
279 	write_pnet(&ldev->net, mlx5_core_net(dev));
280 	if (register_netdevice_notifier_net(read_pnet(&ldev->net), &ldev->nb)) {
281 		ldev->nb.notifier_call = NULL;
282 		mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
283 	}
284 	ldev->mode = MLX5_LAG_MODE_NONE;
285 
286 	err = mlx5_lag_mp_init(ldev);
287 	if (err)
288 		mlx5_core_err(dev, "Failed to init multipath lag err=%d\n",
289 			      err);
290 
291 	ldev->ports = MLX5_CAP_GEN(dev, num_lag_ports);
292 	ldev->buckets = 1;
293 
294 	return ldev;
295 }
296 
mlx5_lag_dev_get_netdev_idx(struct mlx5_lag * ldev,struct net_device * ndev)297 int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
298 				struct net_device *ndev)
299 {
300 	int i;
301 
302 	mlx5_ldev_for_each(i, 0, ldev)
303 		if (ldev->pf[i].netdev == ndev)
304 			return i;
305 
306 	return -ENOENT;
307 }
308 
mlx5_lag_get_dev_index_by_seq(struct mlx5_lag * ldev,int seq)309 int mlx5_lag_get_dev_index_by_seq(struct mlx5_lag *ldev, int seq)
310 {
311 	int i, num = 0;
312 
313 	if (!ldev)
314 		return -ENOENT;
315 
316 	mlx5_ldev_for_each(i, 0, ldev) {
317 		if (num == seq)
318 			return i;
319 		num++;
320 	}
321 	return -ENOENT;
322 }
323 
mlx5_lag_num_devs(struct mlx5_lag * ldev)324 int mlx5_lag_num_devs(struct mlx5_lag *ldev)
325 {
326 	int i, num = 0;
327 
328 	if (!ldev)
329 		return 0;
330 
331 	mlx5_ldev_for_each(i, 0, ldev) {
332 		(void)i;
333 		num++;
334 	}
335 	return num;
336 }
337 
mlx5_lag_num_netdevs(struct mlx5_lag * ldev)338 int mlx5_lag_num_netdevs(struct mlx5_lag *ldev)
339 {
340 	int i, num = 0;
341 
342 	if (!ldev)
343 		return 0;
344 
345 	mlx5_ldev_for_each(i, 0, ldev)
346 		if (ldev->pf[i].netdev)
347 			num++;
348 	return num;
349 }
350 
__mlx5_lag_is_roce(struct mlx5_lag * ldev)351 static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev)
352 {
353 	return ldev->mode == MLX5_LAG_MODE_ROCE;
354 }
355 
__mlx5_lag_is_sriov(struct mlx5_lag * ldev)356 static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev)
357 {
358 	return ldev->mode == MLX5_LAG_MODE_SRIOV;
359 }
360 
361 /* Create a mapping between steering slots and active ports.
362  * As we have ldev->buckets slots per port first assume the native
363  * mapping should be used.
364  * If there are ports that are disabled fill the relevant slots
365  * with mapping that points to active ports.
366  */
mlx5_infer_tx_affinity_mapping(struct lag_tracker * tracker,struct mlx5_lag * ldev,u8 buckets,u8 * ports)367 static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
368 					   struct mlx5_lag *ldev,
369 					   u8 buckets,
370 					   u8 *ports)
371 {
372 	int disabled[MLX5_MAX_PORTS] = {};
373 	int enabled[MLX5_MAX_PORTS] = {};
374 	int disabled_ports_num = 0;
375 	int enabled_ports_num = 0;
376 	int idx;
377 	u32 rand;
378 	int i;
379 	int j;
380 
381 	mlx5_ldev_for_each(i, 0, ldev) {
382 		if (tracker->netdev_state[i].tx_enabled &&
383 		    tracker->netdev_state[i].link_up)
384 			enabled[enabled_ports_num++] = i;
385 		else
386 			disabled[disabled_ports_num++] = i;
387 	}
388 
389 	/* Use native mapping by default where each port's buckets
390 	 * point the native port: 1 1 1 .. 1 2 2 2 ... 2 3 3 3 ... 3 etc
391 	 */
392 	mlx5_ldev_for_each(i, 0, ldev) {
393 		for (j = 0; j < buckets; j++) {
394 			idx = i * buckets + j;
395 			ports[idx] = i + 1;
396 		}
397 	}
398 
399 	/* If all ports are disabled/enabled keep native mapping */
400 	if (enabled_ports_num == ldev->ports ||
401 	    disabled_ports_num == ldev->ports)
402 		return;
403 
404 	/* Go over the disabled ports and for each assign a random active port */
405 	for (i = 0; i < disabled_ports_num; i++) {
406 		for (j = 0; j < buckets; j++) {
407 			get_random_bytes(&rand, 4);
408 			ports[disabled[i] * buckets + j] = enabled[rand % enabled_ports_num] + 1;
409 		}
410 	}
411 }
412 
mlx5_lag_has_drop_rule(struct mlx5_lag * ldev)413 static bool mlx5_lag_has_drop_rule(struct mlx5_lag *ldev)
414 {
415 	int i;
416 
417 	mlx5_ldev_for_each(i, 0, ldev)
418 		if (ldev->pf[i].has_drop)
419 			return true;
420 	return false;
421 }
422 
mlx5_lag_drop_rule_cleanup(struct mlx5_lag * ldev)423 static void mlx5_lag_drop_rule_cleanup(struct mlx5_lag *ldev)
424 {
425 	int i;
426 
427 	mlx5_ldev_for_each(i, 0, ldev) {
428 		if (!ldev->pf[i].has_drop)
429 			continue;
430 
431 		mlx5_esw_acl_ingress_vport_drop_rule_destroy(ldev->pf[i].dev->priv.eswitch,
432 							     MLX5_VPORT_UPLINK);
433 		ldev->pf[i].has_drop = false;
434 	}
435 }
436 
mlx5_lag_drop_rule_setup(struct mlx5_lag * ldev,struct lag_tracker * tracker)437 static void mlx5_lag_drop_rule_setup(struct mlx5_lag *ldev,
438 				     struct lag_tracker *tracker)
439 {
440 	u8 disabled_ports[MLX5_MAX_PORTS] = {};
441 	struct mlx5_core_dev *dev;
442 	int disabled_index;
443 	int num_disabled;
444 	int err;
445 	int i;
446 
447 	/* First delete the current drop rule so there won't be any dropped
448 	 * packets
449 	 */
450 	mlx5_lag_drop_rule_cleanup(ldev);
451 
452 	if (!ldev->tracker.has_inactive)
453 		return;
454 
455 	mlx5_infer_tx_disabled(tracker, ldev, disabled_ports, &num_disabled);
456 
457 	for (i = 0; i < num_disabled; i++) {
458 		disabled_index = disabled_ports[i];
459 		dev = ldev->pf[disabled_index].dev;
460 		err = mlx5_esw_acl_ingress_vport_drop_rule_create(dev->priv.eswitch,
461 								  MLX5_VPORT_UPLINK);
462 		if (!err)
463 			ldev->pf[disabled_index].has_drop = true;
464 		else
465 			mlx5_core_err(dev,
466 				      "Failed to create lag drop rule, error: %d", err);
467 	}
468 }
469 
mlx5_cmd_modify_active_port(struct mlx5_core_dev * dev,u8 ports)470 static int mlx5_cmd_modify_active_port(struct mlx5_core_dev *dev, u8 ports)
471 {
472 	u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
473 	void *lag_ctx;
474 
475 	lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
476 
477 	MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
478 	MLX5_SET(modify_lag_in, in, field_select, 0x2);
479 
480 	MLX5_SET(lagc, lag_ctx, active_port, ports);
481 
482 	return mlx5_cmd_exec_in(dev, modify_lag, in);
483 }
484 
_mlx5_modify_lag(struct mlx5_lag * ldev,u8 * ports)485 static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports)
486 {
487 	int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
488 	struct mlx5_core_dev *dev0;
489 	u8 active_ports;
490 	int ret;
491 
492 	if (idx < 0)
493 		return -EINVAL;
494 
495 	dev0 = ldev->pf[idx].dev;
496 	if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags)) {
497 		ret = mlx5_lag_port_sel_modify(ldev, ports);
498 		if (ret ||
499 		    !MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table_bypass))
500 			return ret;
501 
502 		active_ports = lag_active_port_bits(ldev);
503 
504 		return mlx5_cmd_modify_active_port(dev0, active_ports);
505 	}
506 	return mlx5_cmd_modify_lag(dev0, ldev, ports);
507 }
508 
mlx5_lag_active_backup_get_netdev(struct mlx5_core_dev * dev)509 static struct net_device *mlx5_lag_active_backup_get_netdev(struct mlx5_core_dev *dev)
510 {
511 	struct net_device *ndev = NULL;
512 	struct mlx5_lag *ldev;
513 	unsigned long flags;
514 	int i, last_idx;
515 
516 	spin_lock_irqsave(&lag_lock, flags);
517 	ldev = mlx5_lag_dev(dev);
518 
519 	if (!ldev)
520 		goto unlock;
521 
522 	mlx5_ldev_for_each(i, 0, ldev)
523 		if (ldev->tracker.netdev_state[i].tx_enabled)
524 			ndev = ldev->pf[i].netdev;
525 	if (!ndev) {
526 		last_idx = mlx5_lag_get_dev_index_by_seq(ldev, ldev->ports - 1);
527 		if (last_idx < 0)
528 			goto unlock;
529 		ndev = ldev->pf[last_idx].netdev;
530 	}
531 
532 	dev_hold(ndev);
533 
534 unlock:
535 	spin_unlock_irqrestore(&lag_lock, flags);
536 
537 	return ndev;
538 }
539 
mlx5_modify_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker)540 void mlx5_modify_lag(struct mlx5_lag *ldev,
541 		     struct lag_tracker *tracker)
542 {
543 	int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
544 	u8 ports[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS] = {};
545 	struct mlx5_core_dev *dev0;
546 	int idx;
547 	int err;
548 	int i;
549 	int j;
550 
551 	if (first_idx < 0)
552 		return;
553 
554 	dev0 = ldev->pf[first_idx].dev;
555 	mlx5_infer_tx_affinity_mapping(tracker, ldev, ldev->buckets, ports);
556 
557 	mlx5_ldev_for_each(i, 0, ldev) {
558 		for (j = 0; j < ldev->buckets; j++) {
559 			idx = i * ldev->buckets + j;
560 			if (ports[idx] == ldev->v2p_map[idx])
561 				continue;
562 			err = _mlx5_modify_lag(ldev, ports);
563 			if (err) {
564 				mlx5_core_err(dev0,
565 					      "Failed to modify LAG (%d)\n",
566 					      err);
567 				return;
568 			}
569 			memcpy(ldev->v2p_map, ports, sizeof(ports));
570 
571 			mlx5_lag_print_mapping(dev0, ldev, tracker,
572 					       ldev->mode_flags);
573 			break;
574 		}
575 	}
576 
577 	if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
578 		struct net_device *ndev = mlx5_lag_active_backup_get_netdev(dev0);
579 
580 		if(!(ldev->mode == MLX5_LAG_MODE_ROCE))
581 			mlx5_lag_drop_rule_setup(ldev, tracker);
582 		/** Only sriov and roce lag should have tracker->tx_type set so
583 		 *  no need to check the mode
584 		 */
585 		blocking_notifier_call_chain(&dev0->priv.lag_nh,
586 					     MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE,
587 					     ndev);
588 		dev_put(ndev);
589 	}
590 }
591 
mlx5_lag_set_port_sel_mode(struct mlx5_lag * ldev,enum mlx5_lag_mode mode,unsigned long * flags)592 static int mlx5_lag_set_port_sel_mode(struct mlx5_lag *ldev,
593 				      enum mlx5_lag_mode mode,
594 				      unsigned long *flags)
595 {
596 	int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
597 	struct mlx5_core_dev *dev0;
598 
599 	if (first_idx < 0)
600 		return -EINVAL;
601 
602 	if (mode == MLX5_LAG_MODE_MPESW ||
603 	    mode == MLX5_LAG_MODE_MULTIPATH)
604 		return 0;
605 
606 	dev0 = ldev->pf[first_idx].dev;
607 
608 	if (!MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table)) {
609 		if (ldev->ports > 2)
610 			return -EINVAL;
611 		return 0;
612 	}
613 
614 	if (ldev->ports > 2)
615 		ldev->buckets = MLX5_LAG_MAX_HASH_BUCKETS;
616 
617 	set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags);
618 
619 	return 0;
620 }
621 
mlx5_lag_set_flags(struct mlx5_lag * ldev,enum mlx5_lag_mode mode,struct lag_tracker * tracker,bool shared_fdb,unsigned long * flags)622 static int mlx5_lag_set_flags(struct mlx5_lag *ldev, enum mlx5_lag_mode mode,
623 			      struct lag_tracker *tracker, bool shared_fdb,
624 			      unsigned long *flags)
625 {
626 	*flags = 0;
627 	if (shared_fdb) {
628 		set_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, flags);
629 		set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags);
630 	}
631 
632 	if (mode == MLX5_LAG_MODE_MPESW)
633 		set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags);
634 
635 	return mlx5_lag_set_port_sel_mode(ldev, mode, flags);
636 }
637 
mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode,unsigned long flags)638 char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags)
639 {
640 	int port_sel_mode = get_port_sel_mode(mode, flags);
641 
642 	switch (port_sel_mode) {
643 	case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY: return "queue_affinity";
644 	case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT: return "hash";
645 	case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW: return "mpesw";
646 	default: return "invalid";
647 	}
648 }
649 
mlx5_lag_create_single_fdb(struct mlx5_lag * ldev)650 static int mlx5_lag_create_single_fdb(struct mlx5_lag *ldev)
651 {
652 	int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
653 	struct mlx5_eswitch *master_esw;
654 	struct mlx5_core_dev *dev0;
655 	int i, j;
656 	int err;
657 
658 	if (first_idx < 0)
659 		return -EINVAL;
660 
661 	dev0 = ldev->pf[first_idx].dev;
662 	master_esw = dev0->priv.eswitch;
663 	mlx5_ldev_for_each(i, first_idx + 1, ldev) {
664 		struct mlx5_eswitch *slave_esw = ldev->pf[i].dev->priv.eswitch;
665 
666 		err = mlx5_eswitch_offloads_single_fdb_add_one(master_esw,
667 							       slave_esw, ldev->ports);
668 		if (err)
669 			goto err;
670 	}
671 	return 0;
672 err:
673 	mlx5_ldev_for_each_reverse(j, i, first_idx + 1, ldev)
674 		mlx5_eswitch_offloads_single_fdb_del_one(master_esw,
675 							 ldev->pf[j].dev->priv.eswitch);
676 	return err;
677 }
678 
mlx5_create_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker,enum mlx5_lag_mode mode,unsigned long flags)679 static int mlx5_create_lag(struct mlx5_lag *ldev,
680 			   struct lag_tracker *tracker,
681 			   enum mlx5_lag_mode mode,
682 			   unsigned long flags)
683 {
684 	bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
685 	int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
686 	u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
687 	struct mlx5_core_dev *dev0;
688 	int err;
689 
690 	if (first_idx < 0)
691 		return -EINVAL;
692 
693 	dev0 = ldev->pf[first_idx].dev;
694 	if (tracker)
695 		mlx5_lag_print_mapping(dev0, ldev, tracker, flags);
696 	mlx5_core_info(dev0, "shared_fdb:%d mode:%s\n",
697 		       shared_fdb, mlx5_get_str_port_sel_mode(mode, flags));
698 
699 	err = mlx5_cmd_create_lag(dev0, ldev, mode, flags);
700 	if (err) {
701 		mlx5_core_err(dev0,
702 			      "Failed to create LAG (%d)\n",
703 			      err);
704 		return err;
705 	}
706 
707 	if (shared_fdb) {
708 		err = mlx5_lag_create_single_fdb(ldev);
709 		if (err)
710 			mlx5_core_err(dev0, "Can't enable single FDB mode\n");
711 		else
712 			mlx5_core_info(dev0, "Operation mode is single FDB\n");
713 	}
714 
715 	if (err) {
716 		MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
717 		if (mlx5_cmd_exec_in(dev0, destroy_lag, in))
718 			mlx5_core_err(dev0,
719 				      "Failed to deactivate RoCE LAG; driver restart required\n");
720 	}
721 	BLOCKING_INIT_NOTIFIER_HEAD(&dev0->priv.lag_nh);
722 
723 	return err;
724 }
725 
mlx5_activate_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker,enum mlx5_lag_mode mode,bool shared_fdb)726 int mlx5_activate_lag(struct mlx5_lag *ldev,
727 		      struct lag_tracker *tracker,
728 		      enum mlx5_lag_mode mode,
729 		      bool shared_fdb)
730 {
731 	int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
732 	bool roce_lag = mode == MLX5_LAG_MODE_ROCE;
733 	struct mlx5_core_dev *dev0;
734 	unsigned long flags = 0;
735 	int err;
736 
737 	if (first_idx < 0)
738 		return -EINVAL;
739 
740 	dev0 = ldev->pf[first_idx].dev;
741 	err = mlx5_lag_set_flags(ldev, mode, tracker, shared_fdb, &flags);
742 	if (err)
743 		return err;
744 
745 	if (mode != MLX5_LAG_MODE_MPESW) {
746 		mlx5_infer_tx_affinity_mapping(tracker, ldev, ldev->buckets, ldev->v2p_map);
747 		if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
748 			err = mlx5_lag_port_sel_create(ldev, tracker->hash_type,
749 						       ldev->v2p_map);
750 			if (err) {
751 				mlx5_core_err(dev0,
752 					      "Failed to create LAG port selection(%d)\n",
753 					      err);
754 				return err;
755 			}
756 		}
757 	}
758 
759 	err = mlx5_create_lag(ldev, tracker, mode, flags);
760 	if (err) {
761 		if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
762 			mlx5_lag_port_sel_destroy(ldev);
763 		if (roce_lag)
764 			mlx5_core_err(dev0,
765 				      "Failed to activate RoCE LAG\n");
766 		else
767 			mlx5_core_err(dev0,
768 				      "Failed to activate VF LAG\n"
769 				      "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
770 		return err;
771 	}
772 
773 	if (tracker && tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP &&
774 	    !roce_lag)
775 		mlx5_lag_drop_rule_setup(ldev, tracker);
776 
777 	ldev->mode = mode;
778 	ldev->mode_flags = flags;
779 	return 0;
780 }
781 
mlx5_deactivate_lag(struct mlx5_lag * ldev)782 int mlx5_deactivate_lag(struct mlx5_lag *ldev)
783 {
784 	int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
785 	u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
786 	bool roce_lag = __mlx5_lag_is_roce(ldev);
787 	unsigned long flags = ldev->mode_flags;
788 	struct mlx5_eswitch *master_esw;
789 	struct mlx5_core_dev *dev0;
790 	int err;
791 	int i;
792 
793 	if (first_idx < 0)
794 		return -EINVAL;
795 
796 	dev0 = ldev->pf[first_idx].dev;
797 	master_esw = dev0->priv.eswitch;
798 	ldev->mode = MLX5_LAG_MODE_NONE;
799 	ldev->mode_flags = 0;
800 	mlx5_lag_mp_reset(ldev);
801 
802 	if (test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags)) {
803 		mlx5_ldev_for_each(i, first_idx + 1, ldev)
804 			mlx5_eswitch_offloads_single_fdb_del_one(master_esw,
805 								 ldev->pf[i].dev->priv.eswitch);
806 		clear_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
807 	}
808 
809 	MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
810 	err = mlx5_cmd_exec_in(dev0, destroy_lag, in);
811 	if (err) {
812 		if (roce_lag) {
813 			mlx5_core_err(dev0,
814 				      "Failed to deactivate RoCE LAG; driver restart required\n");
815 		} else {
816 			mlx5_core_err(dev0,
817 				      "Failed to deactivate VF LAG; driver restart required\n"
818 				      "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
819 		}
820 		return err;
821 	}
822 
823 	if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
824 		mlx5_lag_port_sel_destroy(ldev);
825 		ldev->buckets = 1;
826 	}
827 	if (mlx5_lag_has_drop_rule(ldev))
828 		mlx5_lag_drop_rule_cleanup(ldev);
829 
830 	return 0;
831 }
832 
mlx5_lag_check_prereq(struct mlx5_lag * ldev)833 bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
834 {
835 	int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
836 #ifdef CONFIG_MLX5_ESWITCH
837 	struct mlx5_core_dev *dev;
838 	u8 mode;
839 #endif
840 	bool roce_support;
841 	int i;
842 
843 	if (first_idx < 0 || mlx5_lag_num_devs(ldev) != ldev->ports)
844 		return false;
845 
846 #ifdef CONFIG_MLX5_ESWITCH
847 	mlx5_ldev_for_each(i, 0, ldev) {
848 		dev = ldev->pf[i].dev;
849 		if (mlx5_eswitch_num_vfs(dev->priv.eswitch) && !is_mdev_switchdev_mode(dev))
850 			return false;
851 	}
852 
853 	dev = ldev->pf[first_idx].dev;
854 	mode = mlx5_eswitch_mode(dev);
855 	mlx5_ldev_for_each(i, 0, ldev)
856 		if (mlx5_eswitch_mode(ldev->pf[i].dev) != mode)
857 			return false;
858 
859 #else
860 	mlx5_ldev_for_each(i, 0, ldev)
861 		if (mlx5_sriov_is_enabled(ldev->pf[i].dev))
862 			return false;
863 #endif
864 	roce_support = mlx5_get_roce_state(ldev->pf[first_idx].dev);
865 	mlx5_ldev_for_each(i, first_idx + 1, ldev)
866 		if (mlx5_get_roce_state(ldev->pf[i].dev) != roce_support)
867 			return false;
868 
869 	return true;
870 }
871 
mlx5_lag_add_devices(struct mlx5_lag * ldev)872 void mlx5_lag_add_devices(struct mlx5_lag *ldev)
873 {
874 	int i;
875 
876 	mlx5_ldev_for_each(i, 0, ldev) {
877 		if (ldev->pf[i].dev->priv.flags &
878 		    MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
879 			continue;
880 
881 		ldev->pf[i].dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
882 		mlx5_rescan_drivers_locked(ldev->pf[i].dev);
883 	}
884 }
885 
mlx5_lag_remove_devices(struct mlx5_lag * ldev)886 void mlx5_lag_remove_devices(struct mlx5_lag *ldev)
887 {
888 	int i;
889 
890 	mlx5_ldev_for_each(i, 0, ldev) {
891 		if (ldev->pf[i].dev->priv.flags &
892 		    MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
893 			continue;
894 
895 		ldev->pf[i].dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
896 		mlx5_rescan_drivers_locked(ldev->pf[i].dev);
897 	}
898 }
899 
mlx5_disable_lag(struct mlx5_lag * ldev)900 void mlx5_disable_lag(struct mlx5_lag *ldev)
901 {
902 	bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
903 	int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
904 	struct mlx5_core_dev *dev0;
905 	bool roce_lag;
906 	int err;
907 	int i;
908 
909 	if (idx < 0)
910 		return;
911 
912 	dev0 = ldev->pf[idx].dev;
913 	roce_lag = __mlx5_lag_is_roce(ldev);
914 
915 	if (shared_fdb) {
916 		mlx5_lag_remove_devices(ldev);
917 	} else if (roce_lag) {
918 		if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) {
919 			dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
920 			mlx5_rescan_drivers_locked(dev0);
921 		}
922 		mlx5_ldev_for_each(i, idx + 1, ldev)
923 			mlx5_nic_vport_disable_roce(ldev->pf[i].dev);
924 	}
925 
926 	err = mlx5_deactivate_lag(ldev);
927 	if (err)
928 		return;
929 
930 	if (shared_fdb || roce_lag)
931 		mlx5_lag_add_devices(ldev);
932 
933 	if (shared_fdb)
934 		mlx5_ldev_for_each(i, 0, ldev)
935 			if (!(ldev->pf[i].dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
936 				mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch);
937 }
938 
mlx5_lag_shared_fdb_supported(struct mlx5_lag * ldev)939 bool mlx5_lag_shared_fdb_supported(struct mlx5_lag *ldev)
940 {
941 	int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
942 	struct mlx5_core_dev *dev;
943 	int i;
944 
945 	if (idx < 0)
946 		return false;
947 
948 	mlx5_ldev_for_each(i, idx + 1, ldev) {
949 		dev = ldev->pf[i].dev;
950 		if (is_mdev_switchdev_mode(dev) &&
951 		    mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) &&
952 		    MLX5_CAP_GEN(dev, lag_native_fdb_selection) &&
953 		    MLX5_CAP_ESW(dev, root_ft_on_other_esw) &&
954 		    mlx5_eswitch_get_npeers(dev->priv.eswitch) ==
955 		    MLX5_CAP_GEN(dev, num_lag_ports) - 1)
956 			continue;
957 		return false;
958 	}
959 
960 	dev = ldev->pf[idx].dev;
961 	if (is_mdev_switchdev_mode(dev) &&
962 	    mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) &&
963 	    mlx5_esw_offloads_devcom_is_ready(dev->priv.eswitch) &&
964 	    MLX5_CAP_ESW(dev, esw_shared_ingress_acl) &&
965 	    mlx5_eswitch_get_npeers(dev->priv.eswitch) == MLX5_CAP_GEN(dev, num_lag_ports) - 1)
966 		return true;
967 
968 	return false;
969 }
970 
mlx5_lag_is_roce_lag(struct mlx5_lag * ldev)971 static bool mlx5_lag_is_roce_lag(struct mlx5_lag *ldev)
972 {
973 	bool roce_lag = true;
974 	int i;
975 
976 	mlx5_ldev_for_each(i, 0, ldev)
977 		roce_lag = roce_lag && !mlx5_sriov_is_enabled(ldev->pf[i].dev);
978 
979 #ifdef CONFIG_MLX5_ESWITCH
980 	mlx5_ldev_for_each(i, 0, ldev)
981 		roce_lag = roce_lag && is_mdev_legacy_mode(ldev->pf[i].dev);
982 #endif
983 
984 	return roce_lag;
985 }
986 
mlx5_lag_should_modify_lag(struct mlx5_lag * ldev,bool do_bond)987 static bool mlx5_lag_should_modify_lag(struct mlx5_lag *ldev, bool do_bond)
988 {
989 	return do_bond && __mlx5_lag_is_active(ldev) &&
990 	       ldev->mode != MLX5_LAG_MODE_MPESW;
991 }
992 
mlx5_lag_should_disable_lag(struct mlx5_lag * ldev,bool do_bond)993 static bool mlx5_lag_should_disable_lag(struct mlx5_lag *ldev, bool do_bond)
994 {
995 	return !do_bond && __mlx5_lag_is_active(ldev) &&
996 	       ldev->mode != MLX5_LAG_MODE_MPESW;
997 }
998 
mlx5_do_bond(struct mlx5_lag * ldev)999 static void mlx5_do_bond(struct mlx5_lag *ldev)
1000 {
1001 	int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
1002 	struct lag_tracker tracker = { };
1003 	struct mlx5_core_dev *dev0;
1004 	struct net_device *ndev;
1005 	bool do_bond, roce_lag;
1006 	int err;
1007 	int i;
1008 
1009 	if (idx < 0)
1010 		return;
1011 
1012 	dev0 = ldev->pf[idx].dev;
1013 	if (!mlx5_lag_is_ready(ldev)) {
1014 		do_bond = false;
1015 	} else {
1016 		/* VF LAG is in multipath mode, ignore bond change requests */
1017 		if (mlx5_lag_is_multipath(dev0))
1018 			return;
1019 
1020 		tracker = ldev->tracker;
1021 
1022 		do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
1023 	}
1024 
1025 	if (do_bond && !__mlx5_lag_is_active(ldev)) {
1026 		bool shared_fdb = mlx5_lag_shared_fdb_supported(ldev);
1027 
1028 		roce_lag = mlx5_lag_is_roce_lag(ldev);
1029 
1030 		if (shared_fdb || roce_lag)
1031 			mlx5_lag_remove_devices(ldev);
1032 
1033 		err = mlx5_activate_lag(ldev, &tracker,
1034 					roce_lag ? MLX5_LAG_MODE_ROCE :
1035 						   MLX5_LAG_MODE_SRIOV,
1036 					shared_fdb);
1037 		if (err) {
1038 			if (shared_fdb || roce_lag)
1039 				mlx5_lag_add_devices(ldev);
1040 			if (shared_fdb) {
1041 				mlx5_ldev_for_each(i, 0, ldev)
1042 					mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch);
1043 			}
1044 
1045 			return;
1046 		} else if (roce_lag) {
1047 			dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
1048 			mlx5_rescan_drivers_locked(dev0);
1049 			mlx5_ldev_for_each(i, idx + 1, ldev) {
1050 				if (mlx5_get_roce_state(ldev->pf[i].dev))
1051 					mlx5_nic_vport_enable_roce(ldev->pf[i].dev);
1052 			}
1053 		} else if (shared_fdb) {
1054 			int i;
1055 
1056 			dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
1057 			mlx5_rescan_drivers_locked(dev0);
1058 
1059 			mlx5_ldev_for_each(i, 0, ldev) {
1060 				err = mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch);
1061 				if (err)
1062 					break;
1063 			}
1064 
1065 			if (err) {
1066 				dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
1067 				mlx5_rescan_drivers_locked(dev0);
1068 				mlx5_deactivate_lag(ldev);
1069 				mlx5_lag_add_devices(ldev);
1070 				mlx5_ldev_for_each(i, 0, ldev)
1071 					mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch);
1072 				mlx5_core_err(dev0, "Failed to enable lag\n");
1073 				return;
1074 			}
1075 		}
1076 		if (tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
1077 			ndev = mlx5_lag_active_backup_get_netdev(dev0);
1078 			/** Only sriov and roce lag should have tracker->TX_type
1079 			 *  set so no need to check the mode
1080 			 */
1081 			blocking_notifier_call_chain(&dev0->priv.lag_nh,
1082 						     MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE,
1083 						     ndev);
1084 			dev_put(ndev);
1085 		}
1086 	} else if (mlx5_lag_should_modify_lag(ldev, do_bond)) {
1087 		mlx5_modify_lag(ldev, &tracker);
1088 	} else if (mlx5_lag_should_disable_lag(ldev, do_bond)) {
1089 		mlx5_disable_lag(ldev);
1090 	}
1091 }
1092 
1093 /* The last mdev to unregister will destroy the workqueue before removing the
1094  * devcom component, and as all the mdevs use the same devcom component we are
1095  * guaranteed that the devcom is valid while the calling work is running.
1096  */
mlx5_lag_get_devcom_comp(struct mlx5_lag * ldev)1097 struct mlx5_devcom_comp_dev *mlx5_lag_get_devcom_comp(struct mlx5_lag *ldev)
1098 {
1099 	struct mlx5_devcom_comp_dev *devcom = NULL;
1100 	int i;
1101 
1102 	mutex_lock(&ldev->lock);
1103 	i = mlx5_get_next_ldev_func(ldev, 0);
1104 	if (i < MLX5_MAX_PORTS)
1105 		devcom = ldev->pf[i].dev->priv.hca_devcom_comp;
1106 	mutex_unlock(&ldev->lock);
1107 	return devcom;
1108 }
1109 
mlx5_queue_bond_work(struct mlx5_lag * ldev,unsigned long delay)1110 static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
1111 {
1112 	queue_delayed_work(ldev->wq, &ldev->bond_work, delay);
1113 }
1114 
mlx5_do_bond_work(struct work_struct * work)1115 static void mlx5_do_bond_work(struct work_struct *work)
1116 {
1117 	struct delayed_work *delayed_work = to_delayed_work(work);
1118 	struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag,
1119 					     bond_work);
1120 	struct mlx5_devcom_comp_dev *devcom;
1121 	int status;
1122 
1123 	devcom = mlx5_lag_get_devcom_comp(ldev);
1124 	if (!devcom)
1125 		return;
1126 
1127 	status = mlx5_devcom_comp_trylock(devcom);
1128 	if (!status) {
1129 		mlx5_queue_bond_work(ldev, HZ);
1130 		return;
1131 	}
1132 
1133 	mutex_lock(&ldev->lock);
1134 	if (ldev->mode_changes_in_progress) {
1135 		mutex_unlock(&ldev->lock);
1136 		mlx5_devcom_comp_unlock(devcom);
1137 		mlx5_queue_bond_work(ldev, HZ);
1138 		return;
1139 	}
1140 
1141 	mlx5_do_bond(ldev);
1142 	mutex_unlock(&ldev->lock);
1143 	mlx5_devcom_comp_unlock(devcom);
1144 }
1145 
mlx5_handle_changeupper_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct netdev_notifier_changeupper_info * info)1146 static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
1147 					 struct lag_tracker *tracker,
1148 					 struct netdev_notifier_changeupper_info *info)
1149 {
1150 	struct net_device *upper = info->upper_dev, *ndev_tmp;
1151 	struct netdev_lag_upper_info *lag_upper_info = NULL;
1152 	bool is_bonded, is_in_lag, mode_supported;
1153 	bool has_inactive = 0;
1154 	struct slave *slave;
1155 	u8 bond_status = 0;
1156 	int num_slaves = 0;
1157 	int changed = 0;
1158 	int i, idx = -1;
1159 
1160 	if (!netif_is_lag_master(upper))
1161 		return 0;
1162 
1163 	if (info->linking)
1164 		lag_upper_info = info->upper_info;
1165 
1166 	/* The event may still be of interest if the slave does not belong to
1167 	 * us, but is enslaved to a master which has one or more of our netdevs
1168 	 * as slaves (e.g., if a new slave is added to a master that bonds two
1169 	 * of our netdevs, we should unbond).
1170 	 */
1171 	rcu_read_lock();
1172 	for_each_netdev_in_bond_rcu(upper, ndev_tmp) {
1173 		mlx5_ldev_for_each(i, 0, ldev) {
1174 			if (ldev->pf[i].netdev == ndev_tmp) {
1175 				idx++;
1176 				break;
1177 			}
1178 		}
1179 		if (i < MLX5_MAX_PORTS) {
1180 			slave = bond_slave_get_rcu(ndev_tmp);
1181 			if (slave)
1182 				has_inactive |= bond_is_slave_inactive(slave);
1183 			bond_status |= (1 << idx);
1184 		}
1185 
1186 		num_slaves++;
1187 	}
1188 	rcu_read_unlock();
1189 
1190 	/* None of this lagdev's netdevs are slaves of this master. */
1191 	if (!(bond_status & GENMASK(ldev->ports - 1, 0)))
1192 		return 0;
1193 
1194 	if (lag_upper_info) {
1195 		tracker->tx_type = lag_upper_info->tx_type;
1196 		tracker->hash_type = lag_upper_info->hash_type;
1197 	}
1198 
1199 	tracker->has_inactive = has_inactive;
1200 	/* Determine bonding status:
1201 	 * A device is considered bonded if both its physical ports are slaves
1202 	 * of the same lag master, and only them.
1203 	 */
1204 	is_in_lag = num_slaves == ldev->ports &&
1205 		bond_status == GENMASK(ldev->ports - 1, 0);
1206 
1207 	/* Lag mode must be activebackup or hash. */
1208 	mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP ||
1209 			 tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH;
1210 
1211 	is_bonded = is_in_lag && mode_supported;
1212 	if (tracker->is_bonded != is_bonded) {
1213 		tracker->is_bonded = is_bonded;
1214 		changed = 1;
1215 	}
1216 
1217 	if (!is_in_lag)
1218 		return changed;
1219 
1220 	if (!mlx5_lag_is_ready(ldev))
1221 		NL_SET_ERR_MSG_MOD(info->info.extack,
1222 				   "Can't activate LAG offload, PF is configured with more than 64 VFs");
1223 	else if (!mode_supported)
1224 		NL_SET_ERR_MSG_MOD(info->info.extack,
1225 				   "Can't activate LAG offload, TX type isn't supported");
1226 
1227 	return changed;
1228 }
1229 
mlx5_handle_changelowerstate_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct net_device * ndev,struct netdev_notifier_changelowerstate_info * info)1230 static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev,
1231 					      struct lag_tracker *tracker,
1232 					      struct net_device *ndev,
1233 					      struct netdev_notifier_changelowerstate_info *info)
1234 {
1235 	struct netdev_lag_lower_state_info *lag_lower_info;
1236 	int idx;
1237 
1238 	if (!netif_is_lag_port(ndev))
1239 		return 0;
1240 
1241 	idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev);
1242 	if (idx < 0)
1243 		return 0;
1244 
1245 	/* This information is used to determine virtual to physical
1246 	 * port mapping.
1247 	 */
1248 	lag_lower_info = info->lower_state_info;
1249 	if (!lag_lower_info)
1250 		return 0;
1251 
1252 	tracker->netdev_state[idx] = *lag_lower_info;
1253 
1254 	return 1;
1255 }
1256 
mlx5_handle_changeinfodata_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct net_device * ndev)1257 static int mlx5_handle_changeinfodata_event(struct mlx5_lag *ldev,
1258 					    struct lag_tracker *tracker,
1259 					    struct net_device *ndev)
1260 {
1261 	struct net_device *ndev_tmp;
1262 	struct slave *slave;
1263 	bool has_inactive = 0;
1264 	int idx;
1265 
1266 	if (!netif_is_lag_master(ndev))
1267 		return 0;
1268 
1269 	rcu_read_lock();
1270 	for_each_netdev_in_bond_rcu(ndev, ndev_tmp) {
1271 		idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
1272 		if (idx < 0)
1273 			continue;
1274 
1275 		slave = bond_slave_get_rcu(ndev_tmp);
1276 		if (slave)
1277 			has_inactive |= bond_is_slave_inactive(slave);
1278 	}
1279 	rcu_read_unlock();
1280 
1281 	if (tracker->has_inactive == has_inactive)
1282 		return 0;
1283 
1284 	tracker->has_inactive = has_inactive;
1285 
1286 	return 1;
1287 }
1288 
1289 /* this handler is always registered to netdev events */
mlx5_lag_netdev_event(struct notifier_block * this,unsigned long event,void * ptr)1290 static int mlx5_lag_netdev_event(struct notifier_block *this,
1291 				 unsigned long event, void *ptr)
1292 {
1293 	struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
1294 	struct lag_tracker tracker;
1295 	struct mlx5_lag *ldev;
1296 	int changed = 0;
1297 
1298 	if (event != NETDEV_CHANGEUPPER &&
1299 	    event != NETDEV_CHANGELOWERSTATE &&
1300 	    event != NETDEV_CHANGEINFODATA)
1301 		return NOTIFY_DONE;
1302 
1303 	ldev    = container_of(this, struct mlx5_lag, nb);
1304 
1305 	tracker = ldev->tracker;
1306 
1307 	switch (event) {
1308 	case NETDEV_CHANGEUPPER:
1309 		changed = mlx5_handle_changeupper_event(ldev, &tracker, ptr);
1310 		break;
1311 	case NETDEV_CHANGELOWERSTATE:
1312 		changed = mlx5_handle_changelowerstate_event(ldev, &tracker,
1313 							     ndev, ptr);
1314 		break;
1315 	case NETDEV_CHANGEINFODATA:
1316 		changed = mlx5_handle_changeinfodata_event(ldev, &tracker, ndev);
1317 		break;
1318 	}
1319 
1320 	ldev->tracker = tracker;
1321 
1322 	if (changed)
1323 		mlx5_queue_bond_work(ldev, 0);
1324 
1325 	return NOTIFY_DONE;
1326 }
1327 
mlx5_ldev_add_netdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev,struct net_device * netdev)1328 static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev,
1329 				struct mlx5_core_dev *dev,
1330 				struct net_device *netdev)
1331 {
1332 	unsigned int fn = mlx5_get_dev_index(dev);
1333 	unsigned long flags;
1334 
1335 	spin_lock_irqsave(&lag_lock, flags);
1336 	ldev->pf[fn].netdev = netdev;
1337 	ldev->tracker.netdev_state[fn].link_up = 0;
1338 	ldev->tracker.netdev_state[fn].tx_enabled = 0;
1339 	spin_unlock_irqrestore(&lag_lock, flags);
1340 }
1341 
mlx5_ldev_remove_netdev(struct mlx5_lag * ldev,struct net_device * netdev)1342 static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev,
1343 				    struct net_device *netdev)
1344 {
1345 	unsigned long flags;
1346 	int i;
1347 
1348 	spin_lock_irqsave(&lag_lock, flags);
1349 	mlx5_ldev_for_each(i, 0, ldev) {
1350 		if (ldev->pf[i].netdev == netdev) {
1351 			ldev->pf[i].netdev = NULL;
1352 			break;
1353 		}
1354 	}
1355 	spin_unlock_irqrestore(&lag_lock, flags);
1356 }
1357 
mlx5_ldev_add_mdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev)1358 static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev,
1359 			      struct mlx5_core_dev *dev)
1360 {
1361 	unsigned int fn = mlx5_get_dev_index(dev);
1362 
1363 	ldev->pf[fn].dev = dev;
1364 	dev->priv.lag = ldev;
1365 }
1366 
mlx5_ldev_remove_mdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev)1367 static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
1368 				  struct mlx5_core_dev *dev)
1369 {
1370 	int fn;
1371 
1372 	fn = mlx5_get_dev_index(dev);
1373 	if (ldev->pf[fn].dev != dev)
1374 		return;
1375 
1376 	ldev->pf[fn].dev = NULL;
1377 	dev->priv.lag = NULL;
1378 }
1379 
1380 /* Must be called with HCA devcom component lock held */
__mlx5_lag_dev_add_mdev(struct mlx5_core_dev * dev)1381 static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
1382 {
1383 	struct mlx5_devcom_comp_dev *pos = NULL;
1384 	struct mlx5_lag *ldev = NULL;
1385 	struct mlx5_core_dev *tmp_dev;
1386 
1387 	tmp_dev = mlx5_devcom_get_next_peer_data(dev->priv.hca_devcom_comp, &pos);
1388 	if (tmp_dev)
1389 		ldev = mlx5_lag_dev(tmp_dev);
1390 
1391 	if (!ldev) {
1392 		ldev = mlx5_lag_dev_alloc(dev);
1393 		if (!ldev) {
1394 			mlx5_core_err(dev, "Failed to alloc lag dev\n");
1395 			return 0;
1396 		}
1397 		mlx5_ldev_add_mdev(ldev, dev);
1398 		return 0;
1399 	}
1400 
1401 	mutex_lock(&ldev->lock);
1402 	if (ldev->mode_changes_in_progress) {
1403 		mutex_unlock(&ldev->lock);
1404 		return -EAGAIN;
1405 	}
1406 	mlx5_ldev_get(ldev);
1407 	mlx5_ldev_add_mdev(ldev, dev);
1408 	mutex_unlock(&ldev->lock);
1409 
1410 	return 0;
1411 }
1412 
mlx5_lag_unregister_hca_devcom_comp(struct mlx5_core_dev * dev)1413 static void mlx5_lag_unregister_hca_devcom_comp(struct mlx5_core_dev *dev)
1414 {
1415 	mlx5_devcom_unregister_component(dev->priv.hca_devcom_comp);
1416 }
1417 
mlx5_lag_register_hca_devcom_comp(struct mlx5_core_dev * dev)1418 static int mlx5_lag_register_hca_devcom_comp(struct mlx5_core_dev *dev)
1419 {
1420 	struct mlx5_devcom_match_attr attr = {
1421 		.key.val = mlx5_query_nic_system_image_guid(dev),
1422 		.flags = MLX5_DEVCOM_MATCH_FLAGS_NS,
1423 		.net = mlx5_core_net(dev),
1424 	};
1425 
1426 	/* This component is use to sync adding core_dev to lag_dev and to sync
1427 	 * changes of mlx5_adev_devices between LAG layer and other layers.
1428 	 */
1429 	dev->priv.hca_devcom_comp =
1430 		mlx5_devcom_register_component(dev->priv.devc,
1431 					       MLX5_DEVCOM_HCA_PORTS,
1432 					       &attr, NULL, dev);
1433 	if (!dev->priv.hca_devcom_comp) {
1434 		mlx5_core_err(dev,
1435 			      "Failed to register devcom HCA component.");
1436 		return -EINVAL;
1437 	}
1438 
1439 	return 0;
1440 }
1441 
mlx5_lag_remove_mdev(struct mlx5_core_dev * dev)1442 void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
1443 {
1444 	struct mlx5_lag *ldev;
1445 
1446 	ldev = mlx5_lag_dev(dev);
1447 	if (!ldev)
1448 		return;
1449 
1450 	/* mdev is being removed, might as well remove debugfs
1451 	 * as early as possible.
1452 	 */
1453 	mlx5_ldev_remove_debugfs(dev->priv.dbg.lag_debugfs);
1454 recheck:
1455 	mutex_lock(&ldev->lock);
1456 	if (ldev->mode_changes_in_progress) {
1457 		mutex_unlock(&ldev->lock);
1458 		msleep(100);
1459 		goto recheck;
1460 	}
1461 	mlx5_ldev_remove_mdev(ldev, dev);
1462 	mutex_unlock(&ldev->lock);
1463 	mlx5_lag_unregister_hca_devcom_comp(dev);
1464 	mlx5_ldev_put(ldev);
1465 }
1466 
mlx5_lag_add_mdev(struct mlx5_core_dev * dev)1467 void mlx5_lag_add_mdev(struct mlx5_core_dev *dev)
1468 {
1469 	int err;
1470 
1471 	if (!mlx5_lag_is_supported(dev))
1472 		return;
1473 
1474 	if (mlx5_lag_register_hca_devcom_comp(dev))
1475 		return;
1476 
1477 recheck:
1478 	mlx5_devcom_comp_lock(dev->priv.hca_devcom_comp);
1479 	err = __mlx5_lag_dev_add_mdev(dev);
1480 	mlx5_devcom_comp_unlock(dev->priv.hca_devcom_comp);
1481 
1482 	if (err) {
1483 		msleep(100);
1484 		goto recheck;
1485 	}
1486 	mlx5_ldev_add_debugfs(dev);
1487 }
1488 
mlx5_lag_remove_netdev(struct mlx5_core_dev * dev,struct net_device * netdev)1489 void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev,
1490 			    struct net_device *netdev)
1491 {
1492 	struct mlx5_lag *ldev;
1493 	bool lag_is_active;
1494 
1495 	ldev = mlx5_lag_dev(dev);
1496 	if (!ldev)
1497 		return;
1498 
1499 	mutex_lock(&ldev->lock);
1500 	mlx5_ldev_remove_netdev(ldev, netdev);
1501 	clear_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
1502 
1503 	lag_is_active = __mlx5_lag_is_active(ldev);
1504 	mutex_unlock(&ldev->lock);
1505 
1506 	if (lag_is_active)
1507 		mlx5_queue_bond_work(ldev, 0);
1508 }
1509 
mlx5_lag_add_netdev(struct mlx5_core_dev * dev,struct net_device * netdev)1510 void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
1511 			 struct net_device *netdev)
1512 {
1513 	struct mlx5_lag *ldev;
1514 	int num = 0;
1515 
1516 	ldev = mlx5_lag_dev(dev);
1517 	if (!ldev)
1518 		return;
1519 
1520 	mutex_lock(&ldev->lock);
1521 	mlx5_ldev_add_netdev(ldev, dev, netdev);
1522 	num = mlx5_lag_num_netdevs(ldev);
1523 	if (num >= ldev->ports)
1524 		set_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
1525 	mutex_unlock(&ldev->lock);
1526 	mlx5_queue_bond_work(ldev, 0);
1527 }
1528 
mlx5_get_pre_ldev_func(struct mlx5_lag * ldev,int start_idx,int end_idx)1529 int mlx5_get_pre_ldev_func(struct mlx5_lag *ldev, int start_idx, int end_idx)
1530 {
1531 	int i;
1532 
1533 	for (i = start_idx; i >= end_idx; i--)
1534 		if (ldev->pf[i].dev)
1535 			return i;
1536 	return -1;
1537 }
1538 
mlx5_get_next_ldev_func(struct mlx5_lag * ldev,int start_idx)1539 int mlx5_get_next_ldev_func(struct mlx5_lag *ldev, int start_idx)
1540 {
1541 	int i;
1542 
1543 	for (i = start_idx; i < MLX5_MAX_PORTS; i++)
1544 		if (ldev->pf[i].dev)
1545 			return i;
1546 	return MLX5_MAX_PORTS;
1547 }
1548 
mlx5_lag_is_roce(struct mlx5_core_dev * dev)1549 bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
1550 {
1551 	struct mlx5_lag *ldev;
1552 	unsigned long flags;
1553 	bool res;
1554 
1555 	spin_lock_irqsave(&lag_lock, flags);
1556 	ldev = mlx5_lag_dev(dev);
1557 	res  = ldev && __mlx5_lag_is_roce(ldev);
1558 	spin_unlock_irqrestore(&lag_lock, flags);
1559 
1560 	return res;
1561 }
1562 EXPORT_SYMBOL(mlx5_lag_is_roce);
1563 
mlx5_lag_is_active(struct mlx5_core_dev * dev)1564 bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
1565 {
1566 	struct mlx5_lag *ldev;
1567 	unsigned long flags;
1568 	bool res;
1569 
1570 	spin_lock_irqsave(&lag_lock, flags);
1571 	ldev = mlx5_lag_dev(dev);
1572 	res  = ldev && __mlx5_lag_is_active(ldev);
1573 	spin_unlock_irqrestore(&lag_lock, flags);
1574 
1575 	return res;
1576 }
1577 EXPORT_SYMBOL(mlx5_lag_is_active);
1578 
mlx5_lag_mode_is_hash(struct mlx5_core_dev * dev)1579 bool mlx5_lag_mode_is_hash(struct mlx5_core_dev *dev)
1580 {
1581 	struct mlx5_lag *ldev;
1582 	unsigned long flags;
1583 	bool res = 0;
1584 
1585 	spin_lock_irqsave(&lag_lock, flags);
1586 	ldev = mlx5_lag_dev(dev);
1587 	if (ldev)
1588 		res = test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags);
1589 	spin_unlock_irqrestore(&lag_lock, flags);
1590 
1591 	return res;
1592 }
1593 EXPORT_SYMBOL(mlx5_lag_mode_is_hash);
1594 
mlx5_lag_is_master(struct mlx5_core_dev * dev)1595 bool mlx5_lag_is_master(struct mlx5_core_dev *dev)
1596 {
1597 	struct mlx5_lag *ldev;
1598 	unsigned long flags;
1599 	bool res = false;
1600 	int idx;
1601 
1602 	spin_lock_irqsave(&lag_lock, flags);
1603 	ldev = mlx5_lag_dev(dev);
1604 	idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
1605 	res = ldev && __mlx5_lag_is_active(ldev) && idx >= 0 && dev == ldev->pf[idx].dev;
1606 	spin_unlock_irqrestore(&lag_lock, flags);
1607 
1608 	return res;
1609 }
1610 EXPORT_SYMBOL(mlx5_lag_is_master);
1611 
mlx5_lag_is_sriov(struct mlx5_core_dev * dev)1612 bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
1613 {
1614 	struct mlx5_lag *ldev;
1615 	unsigned long flags;
1616 	bool res;
1617 
1618 	spin_lock_irqsave(&lag_lock, flags);
1619 	ldev = mlx5_lag_dev(dev);
1620 	res  = ldev && __mlx5_lag_is_sriov(ldev);
1621 	spin_unlock_irqrestore(&lag_lock, flags);
1622 
1623 	return res;
1624 }
1625 EXPORT_SYMBOL(mlx5_lag_is_sriov);
1626 
mlx5_lag_is_shared_fdb(struct mlx5_core_dev * dev)1627 bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev)
1628 {
1629 	struct mlx5_lag *ldev;
1630 	unsigned long flags;
1631 	bool res;
1632 
1633 	spin_lock_irqsave(&lag_lock, flags);
1634 	ldev = mlx5_lag_dev(dev);
1635 	res = ldev && test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
1636 	spin_unlock_irqrestore(&lag_lock, flags);
1637 
1638 	return res;
1639 }
1640 EXPORT_SYMBOL(mlx5_lag_is_shared_fdb);
1641 
mlx5_lag_disable_change(struct mlx5_core_dev * dev)1642 void mlx5_lag_disable_change(struct mlx5_core_dev *dev)
1643 {
1644 	struct mlx5_lag *ldev;
1645 
1646 	ldev = mlx5_lag_dev(dev);
1647 	if (!ldev)
1648 		return;
1649 
1650 	mlx5_devcom_comp_lock(dev->priv.hca_devcom_comp);
1651 	mutex_lock(&ldev->lock);
1652 
1653 	ldev->mode_changes_in_progress++;
1654 	if (__mlx5_lag_is_active(ldev))
1655 		mlx5_disable_lag(ldev);
1656 
1657 	mutex_unlock(&ldev->lock);
1658 	mlx5_devcom_comp_unlock(dev->priv.hca_devcom_comp);
1659 }
1660 
mlx5_lag_enable_change(struct mlx5_core_dev * dev)1661 void mlx5_lag_enable_change(struct mlx5_core_dev *dev)
1662 {
1663 	struct mlx5_lag *ldev;
1664 
1665 	ldev = mlx5_lag_dev(dev);
1666 	if (!ldev)
1667 		return;
1668 
1669 	mutex_lock(&ldev->lock);
1670 	ldev->mode_changes_in_progress--;
1671 	mutex_unlock(&ldev->lock);
1672 	mlx5_queue_bond_work(ldev, 0);
1673 }
1674 
mlx5_lag_get_slave_port(struct mlx5_core_dev * dev,struct net_device * slave)1675 u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
1676 			   struct net_device *slave)
1677 {
1678 	struct mlx5_lag *ldev;
1679 	unsigned long flags;
1680 	u8 port = 0;
1681 	int i;
1682 
1683 	spin_lock_irqsave(&lag_lock, flags);
1684 	ldev = mlx5_lag_dev(dev);
1685 	if (!(ldev && __mlx5_lag_is_roce(ldev)))
1686 		goto unlock;
1687 
1688 	mlx5_ldev_for_each(i, 0, ldev) {
1689 		if (ldev->pf[i].netdev == slave) {
1690 			port = i;
1691 			break;
1692 		}
1693 	}
1694 
1695 	port = ldev->v2p_map[port * ldev->buckets];
1696 
1697 unlock:
1698 	spin_unlock_irqrestore(&lag_lock, flags);
1699 	return port;
1700 }
1701 EXPORT_SYMBOL(mlx5_lag_get_slave_port);
1702 
mlx5_lag_get_num_ports(struct mlx5_core_dev * dev)1703 u8 mlx5_lag_get_num_ports(struct mlx5_core_dev *dev)
1704 {
1705 	struct mlx5_lag *ldev;
1706 
1707 	ldev = mlx5_lag_dev(dev);
1708 	if (!ldev)
1709 		return 0;
1710 
1711 	return ldev->ports;
1712 }
1713 EXPORT_SYMBOL(mlx5_lag_get_num_ports);
1714 
mlx5_lag_get_next_peer_mdev(struct mlx5_core_dev * dev,int * i)1715 struct mlx5_core_dev *mlx5_lag_get_next_peer_mdev(struct mlx5_core_dev *dev, int *i)
1716 {
1717 	struct mlx5_core_dev *peer_dev = NULL;
1718 	struct mlx5_lag *ldev;
1719 	unsigned long flags;
1720 	int idx;
1721 
1722 	spin_lock_irqsave(&lag_lock, flags);
1723 	ldev = mlx5_lag_dev(dev);
1724 	if (!ldev)
1725 		goto unlock;
1726 
1727 	if (*i == MLX5_MAX_PORTS)
1728 		goto unlock;
1729 	mlx5_ldev_for_each(idx, *i, ldev)
1730 		if (ldev->pf[idx].dev != dev)
1731 			break;
1732 
1733 	if (idx == MLX5_MAX_PORTS) {
1734 		*i = idx;
1735 		goto unlock;
1736 	}
1737 	*i = idx + 1;
1738 
1739 	peer_dev = ldev->pf[idx].dev;
1740 
1741 unlock:
1742 	spin_unlock_irqrestore(&lag_lock, flags);
1743 	return peer_dev;
1744 }
1745 EXPORT_SYMBOL(mlx5_lag_get_next_peer_mdev);
1746 
mlx5_lag_query_cong_counters(struct mlx5_core_dev * dev,u64 * values,int num_counters,size_t * offsets)1747 int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
1748 				 u64 *values,
1749 				 int num_counters,
1750 				 size_t *offsets)
1751 {
1752 	int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
1753 	struct mlx5_core_dev **mdev;
1754 	int ret = 0, i, j, idx = 0;
1755 	struct mlx5_lag *ldev;
1756 	unsigned long flags;
1757 	int num_ports;
1758 	void *out;
1759 
1760 	out = kvzalloc(outlen, GFP_KERNEL);
1761 	if (!out)
1762 		return -ENOMEM;
1763 
1764 	mdev = kvzalloc(sizeof(mdev[0]) * MLX5_MAX_PORTS, GFP_KERNEL);
1765 	if (!mdev) {
1766 		ret = -ENOMEM;
1767 		goto free_out;
1768 	}
1769 
1770 	memset(values, 0, sizeof(*values) * num_counters);
1771 
1772 	spin_lock_irqsave(&lag_lock, flags);
1773 	ldev = mlx5_lag_dev(dev);
1774 	if (ldev && __mlx5_lag_is_active(ldev)) {
1775 		num_ports = ldev->ports;
1776 		mlx5_ldev_for_each(i, 0, ldev)
1777 			mdev[idx++] = ldev->pf[i].dev;
1778 	} else {
1779 		num_ports = 1;
1780 		mdev[MLX5_LAG_P1] = dev;
1781 	}
1782 	spin_unlock_irqrestore(&lag_lock, flags);
1783 
1784 	for (i = 0; i < num_ports; ++i) {
1785 		u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {};
1786 
1787 		MLX5_SET(query_cong_statistics_in, in, opcode,
1788 			 MLX5_CMD_OP_QUERY_CONG_STATISTICS);
1789 		ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in,
1790 					  out);
1791 		if (ret)
1792 			goto free_mdev;
1793 
1794 		for (j = 0; j < num_counters; ++j)
1795 			values[j] += be64_to_cpup((__be64 *)(out + offsets[j]));
1796 	}
1797 
1798 free_mdev:
1799 	kvfree(mdev);
1800 free_out:
1801 	kvfree(out);
1802 	return ret;
1803 }
1804 EXPORT_SYMBOL(mlx5_lag_query_cong_counters);
1805