xref: /linux/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c (revision b9c8fc2caea6ff7e45c6942de8fee53515c66b34)
1 /*
2  * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #include <linux/netdevice.h>
34 #include <net/bonding.h>
35 #include <linux/mlx5/driver.h>
36 #include <linux/mlx5/eswitch.h>
37 #include <linux/mlx5/vport.h>
38 #include "lib/mlx5.h"
39 #include "lib/devcom.h"
40 #include "mlx5_core.h"
41 #include "eswitch.h"
42 #include "esw/acl/ofld.h"
43 #include "lag.h"
44 #include "mp.h"
45 #include "mpesw.h"
46 
47 
48 /* General purpose, use for short periods of time.
49  * Beware of lock dependencies (preferably, no locks should be acquired
50  * under it).
51  */
52 static DEFINE_SPINLOCK(lag_lock);
53 
get_port_sel_mode(enum mlx5_lag_mode mode,unsigned long flags)54 static int get_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags)
55 {
56 	if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
57 		return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT;
58 
59 	if (mode == MLX5_LAG_MODE_MPESW)
60 		return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW;
61 
62 	return MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY;
63 }
64 
lag_active_port_bits(struct mlx5_lag * ldev)65 static u8 lag_active_port_bits(struct mlx5_lag *ldev)
66 {
67 	u8 enabled_ports[MLX5_MAX_PORTS] = {};
68 	u8 active_port = 0;
69 	int num_enabled;
70 	int idx;
71 
72 	mlx5_infer_tx_enabled(&ldev->tracker, ldev, enabled_ports,
73 			      &num_enabled);
74 	for (idx = 0; idx < num_enabled; idx++)
75 		active_port |= BIT_MASK(enabled_ports[idx]);
76 
77 	return active_port;
78 }
79 
mlx5_cmd_create_lag(struct mlx5_core_dev * dev,struct mlx5_lag * ldev,int mode,unsigned long flags)80 static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, struct mlx5_lag *ldev,
81 			       int mode, unsigned long flags)
82 {
83 	bool fdb_sel_mode = test_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE,
84 				     &flags);
85 	int port_sel_mode = get_port_sel_mode(mode, flags);
86 	u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {};
87 	u8 *ports = ldev->v2p_map;
88 	int idx0, idx1;
89 	void *lag_ctx;
90 
91 	lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
92 	MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG);
93 	MLX5_SET(lagc, lag_ctx, fdb_selection_mode, fdb_sel_mode);
94 	idx0 = mlx5_lag_get_dev_index_by_seq(ldev, 0);
95 	idx1 = mlx5_lag_get_dev_index_by_seq(ldev, 1);
96 
97 	if (idx0 < 0 || idx1 < 0)
98 		return -EINVAL;
99 
100 	switch (port_sel_mode) {
101 	case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY:
102 		MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[idx0]);
103 		MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[idx1]);
104 		break;
105 	case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT:
106 		if (!MLX5_CAP_PORT_SELECTION(dev, port_select_flow_table_bypass))
107 			break;
108 
109 		MLX5_SET(lagc, lag_ctx, active_port,
110 			 lag_active_port_bits(mlx5_lag_dev(dev)));
111 		break;
112 	default:
113 		break;
114 	}
115 	MLX5_SET(lagc, lag_ctx, port_select_mode, port_sel_mode);
116 
117 	return mlx5_cmd_exec_in(dev, create_lag, in);
118 }
119 
mlx5_cmd_modify_lag(struct mlx5_core_dev * dev,struct mlx5_lag * ldev,u8 * ports)120 static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, struct mlx5_lag *ldev,
121 			       u8 *ports)
122 {
123 	u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
124 	void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
125 	int idx0, idx1;
126 
127 	idx0 = mlx5_lag_get_dev_index_by_seq(ldev, 0);
128 	idx1 = mlx5_lag_get_dev_index_by_seq(ldev, 1);
129 	if (idx0 < 0 || idx1 < 0)
130 		return -EINVAL;
131 
132 	MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
133 	MLX5_SET(modify_lag_in, in, field_select, 0x1);
134 
135 	MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[idx0]);
136 	MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[idx1]);
137 
138 	return mlx5_cmd_exec_in(dev, modify_lag, in);
139 }
140 
mlx5_cmd_create_vport_lag(struct mlx5_core_dev * dev)141 int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev)
142 {
143 	u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {};
144 
145 	MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG);
146 
147 	return mlx5_cmd_exec_in(dev, create_vport_lag, in);
148 }
149 EXPORT_SYMBOL(mlx5_cmd_create_vport_lag);
150 
mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev * dev)151 int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
152 {
153 	u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {};
154 
155 	MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG);
156 
157 	return mlx5_cmd_exec_in(dev, destroy_vport_lag, in);
158 }
159 EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
160 
mlx5_infer_tx_disabled(struct lag_tracker * tracker,struct mlx5_lag * ldev,u8 * ports,int * num_disabled)161 static void mlx5_infer_tx_disabled(struct lag_tracker *tracker, struct mlx5_lag *ldev,
162 				   u8 *ports, int *num_disabled)
163 {
164 	int i;
165 
166 	*num_disabled = 0;
167 	mlx5_ldev_for_each(i, 0, ldev)
168 		if (!tracker->netdev_state[i].tx_enabled ||
169 		    !tracker->netdev_state[i].link_up)
170 			ports[(*num_disabled)++] = i;
171 }
172 
mlx5_infer_tx_enabled(struct lag_tracker * tracker,struct mlx5_lag * ldev,u8 * ports,int * num_enabled)173 void mlx5_infer_tx_enabled(struct lag_tracker *tracker, struct mlx5_lag *ldev,
174 			   u8 *ports, int *num_enabled)
175 {
176 	int i;
177 
178 	*num_enabled = 0;
179 	mlx5_ldev_for_each(i, 0, ldev)
180 		if (tracker->netdev_state[i].tx_enabled &&
181 		    tracker->netdev_state[i].link_up)
182 			ports[(*num_enabled)++] = i;
183 
184 	if (*num_enabled == 0)
185 		mlx5_infer_tx_disabled(tracker, ldev, ports, num_enabled);
186 }
187 
mlx5_lag_print_mapping(struct mlx5_core_dev * dev,struct mlx5_lag * ldev,struct lag_tracker * tracker,unsigned long flags)188 static void mlx5_lag_print_mapping(struct mlx5_core_dev *dev,
189 				   struct mlx5_lag *ldev,
190 				   struct lag_tracker *tracker,
191 				   unsigned long flags)
192 {
193 	char buf[MLX5_MAX_PORTS * 10 + 1] = {};
194 	u8 enabled_ports[MLX5_MAX_PORTS] = {};
195 	int written = 0;
196 	int num_enabled;
197 	int idx;
198 	int err;
199 	int i;
200 	int j;
201 
202 	if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
203 		mlx5_infer_tx_enabled(tracker, ldev, enabled_ports,
204 				      &num_enabled);
205 		for (i = 0; i < num_enabled; i++) {
206 			err = scnprintf(buf + written, 4, "%d, ", enabled_ports[i] + 1);
207 			if (err != 3)
208 				return;
209 			written += err;
210 		}
211 		buf[written - 2] = 0;
212 		mlx5_core_info(dev, "lag map active ports: %s\n", buf);
213 	} else {
214 		mlx5_ldev_for_each(i, 0, ldev) {
215 			for (j  = 0; j < ldev->buckets; j++) {
216 				idx = i * ldev->buckets + j;
217 				err = scnprintf(buf + written, 10,
218 						" port %d:%d", i + 1, ldev->v2p_map[idx]);
219 				if (err != 9)
220 					return;
221 				written += err;
222 			}
223 		}
224 		mlx5_core_info(dev, "lag map:%s\n", buf);
225 	}
226 }
227 
228 static int mlx5_lag_netdev_event(struct notifier_block *this,
229 				 unsigned long event, void *ptr);
230 static void mlx5_do_bond_work(struct work_struct *work);
231 
mlx5_ldev_free(struct kref * ref)232 static void mlx5_ldev_free(struct kref *ref)
233 {
234 	struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref);
235 	struct net *net;
236 	int i;
237 
238 	if (ldev->nb.notifier_call) {
239 		net = read_pnet(&ldev->net);
240 		unregister_netdevice_notifier_net(net, &ldev->nb);
241 	}
242 
243 	mlx5_ldev_for_each(i, 0, ldev) {
244 		if (ldev->pf[i].dev &&
245 		    ldev->pf[i].port_change_nb.nb.notifier_call) {
246 			struct mlx5_nb *nb = &ldev->pf[i].port_change_nb;
247 
248 			mlx5_eq_notifier_unregister(ldev->pf[i].dev, nb);
249 		}
250 	}
251 
252 	mlx5_lag_mp_cleanup(ldev);
253 	cancel_delayed_work_sync(&ldev->bond_work);
254 	cancel_work_sync(&ldev->speed_update_work);
255 	destroy_workqueue(ldev->wq);
256 	mutex_destroy(&ldev->lock);
257 	kfree(ldev);
258 }
259 
mlx5_ldev_put(struct mlx5_lag * ldev)260 static void mlx5_ldev_put(struct mlx5_lag *ldev)
261 {
262 	kref_put(&ldev->ref, mlx5_ldev_free);
263 }
264 
mlx5_ldev_get(struct mlx5_lag * ldev)265 static void mlx5_ldev_get(struct mlx5_lag *ldev)
266 {
267 	kref_get(&ldev->ref);
268 }
269 
mlx5_lag_dev_alloc(struct mlx5_core_dev * dev)270 static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev)
271 {
272 	struct mlx5_lag *ldev;
273 	int err;
274 
275 	ldev = kzalloc_obj(*ldev);
276 	if (!ldev)
277 		return NULL;
278 
279 	ldev->wq = create_singlethread_workqueue("mlx5_lag");
280 	if (!ldev->wq) {
281 		kfree(ldev);
282 		return NULL;
283 	}
284 
285 	kref_init(&ldev->ref);
286 	mutex_init(&ldev->lock);
287 	INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
288 	INIT_WORK(&ldev->speed_update_work, mlx5_mpesw_speed_update_work);
289 
290 	ldev->nb.notifier_call = mlx5_lag_netdev_event;
291 	write_pnet(&ldev->net, mlx5_core_net(dev));
292 	if (register_netdevice_notifier_net(read_pnet(&ldev->net), &ldev->nb)) {
293 		ldev->nb.notifier_call = NULL;
294 		mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
295 	}
296 	ldev->mode = MLX5_LAG_MODE_NONE;
297 
298 	err = mlx5_lag_mp_init(ldev);
299 	if (err)
300 		mlx5_core_err(dev, "Failed to init multipath lag err=%d\n",
301 			      err);
302 
303 	ldev->ports = MLX5_CAP_GEN(dev, num_lag_ports);
304 	ldev->buckets = 1;
305 
306 	return ldev;
307 }
308 
mlx5_lag_dev_get_netdev_idx(struct mlx5_lag * ldev,struct net_device * ndev)309 int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
310 				struct net_device *ndev)
311 {
312 	int i;
313 
314 	mlx5_ldev_for_each(i, 0, ldev)
315 		if (ldev->pf[i].netdev == ndev)
316 			return i;
317 
318 	return -ENOENT;
319 }
320 
mlx5_lag_get_dev_index_by_seq(struct mlx5_lag * ldev,int seq)321 int mlx5_lag_get_dev_index_by_seq(struct mlx5_lag *ldev, int seq)
322 {
323 	int i, num = 0;
324 
325 	if (!ldev)
326 		return -ENOENT;
327 
328 	mlx5_ldev_for_each(i, 0, ldev) {
329 		if (num == seq)
330 			return i;
331 		num++;
332 	}
333 	return -ENOENT;
334 }
335 
mlx5_lag_num_devs(struct mlx5_lag * ldev)336 int mlx5_lag_num_devs(struct mlx5_lag *ldev)
337 {
338 	int i, num = 0;
339 
340 	if (!ldev)
341 		return 0;
342 
343 	mlx5_ldev_for_each(i, 0, ldev) {
344 		(void)i;
345 		num++;
346 	}
347 	return num;
348 }
349 
mlx5_lag_num_netdevs(struct mlx5_lag * ldev)350 int mlx5_lag_num_netdevs(struct mlx5_lag *ldev)
351 {
352 	int i, num = 0;
353 
354 	if (!ldev)
355 		return 0;
356 
357 	mlx5_ldev_for_each(i, 0, ldev)
358 		if (ldev->pf[i].netdev)
359 			num++;
360 	return num;
361 }
362 
__mlx5_lag_is_roce(struct mlx5_lag * ldev)363 static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev)
364 {
365 	return ldev->mode == MLX5_LAG_MODE_ROCE;
366 }
367 
__mlx5_lag_is_sriov(struct mlx5_lag * ldev)368 static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev)
369 {
370 	return ldev->mode == MLX5_LAG_MODE_SRIOV;
371 }
372 
373 /* Create a mapping between steering slots and active ports.
374  * As we have ldev->buckets slots per port first assume the native
375  * mapping should be used.
376  * If there are ports that are disabled fill the relevant slots
377  * with mapping that points to active ports.
378  */
mlx5_infer_tx_affinity_mapping(struct lag_tracker * tracker,struct mlx5_lag * ldev,u8 buckets,u8 * ports)379 static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
380 					   struct mlx5_lag *ldev,
381 					   u8 buckets,
382 					   u8 *ports)
383 {
384 	int disabled[MLX5_MAX_PORTS] = {};
385 	int enabled[MLX5_MAX_PORTS] = {};
386 	int disabled_ports_num = 0;
387 	int enabled_ports_num = 0;
388 	int idx;
389 	u32 rand;
390 	int i;
391 	int j;
392 
393 	mlx5_ldev_for_each(i, 0, ldev) {
394 		if (tracker->netdev_state[i].tx_enabled &&
395 		    tracker->netdev_state[i].link_up)
396 			enabled[enabled_ports_num++] = i;
397 		else
398 			disabled[disabled_ports_num++] = i;
399 	}
400 
401 	/* Use native mapping by default where each port's buckets
402 	 * point the native port: 1 1 1 .. 1 2 2 2 ... 2 3 3 3 ... 3 etc
403 	 */
404 	mlx5_ldev_for_each(i, 0, ldev) {
405 		for (j = 0; j < buckets; j++) {
406 			idx = i * buckets + j;
407 			ports[idx] = i + 1;
408 		}
409 	}
410 
411 	/* If all ports are disabled/enabled keep native mapping */
412 	if (enabled_ports_num == ldev->ports ||
413 	    disabled_ports_num == ldev->ports)
414 		return;
415 
416 	/* Go over the disabled ports and for each assign a random active port */
417 	for (i = 0; i < disabled_ports_num; i++) {
418 		for (j = 0; j < buckets; j++) {
419 			get_random_bytes(&rand, 4);
420 			ports[disabled[i] * buckets + j] = enabled[rand % enabled_ports_num] + 1;
421 		}
422 	}
423 }
424 
mlx5_lag_has_drop_rule(struct mlx5_lag * ldev)425 static bool mlx5_lag_has_drop_rule(struct mlx5_lag *ldev)
426 {
427 	int i;
428 
429 	mlx5_ldev_for_each(i, 0, ldev)
430 		if (ldev->pf[i].has_drop)
431 			return true;
432 	return false;
433 }
434 
mlx5_lag_drop_rule_cleanup(struct mlx5_lag * ldev)435 static void mlx5_lag_drop_rule_cleanup(struct mlx5_lag *ldev)
436 {
437 	int i;
438 
439 	mlx5_ldev_for_each(i, 0, ldev) {
440 		if (!ldev->pf[i].has_drop)
441 			continue;
442 
443 		mlx5_esw_acl_ingress_vport_drop_rule_destroy(ldev->pf[i].dev->priv.eswitch,
444 							     MLX5_VPORT_UPLINK);
445 		ldev->pf[i].has_drop = false;
446 	}
447 }
448 
mlx5_lag_drop_rule_setup(struct mlx5_lag * ldev,struct lag_tracker * tracker)449 static void mlx5_lag_drop_rule_setup(struct mlx5_lag *ldev,
450 				     struct lag_tracker *tracker)
451 {
452 	u8 disabled_ports[MLX5_MAX_PORTS] = {};
453 	struct mlx5_core_dev *dev;
454 	int disabled_index;
455 	int num_disabled;
456 	int err;
457 	int i;
458 
459 	/* First delete the current drop rule so there won't be any dropped
460 	 * packets
461 	 */
462 	mlx5_lag_drop_rule_cleanup(ldev);
463 
464 	if (!ldev->tracker.has_inactive)
465 		return;
466 
467 	mlx5_infer_tx_disabled(tracker, ldev, disabled_ports, &num_disabled);
468 
469 	for (i = 0; i < num_disabled; i++) {
470 		disabled_index = disabled_ports[i];
471 		dev = ldev->pf[disabled_index].dev;
472 		err = mlx5_esw_acl_ingress_vport_drop_rule_create(dev->priv.eswitch,
473 								  MLX5_VPORT_UPLINK);
474 		if (!err)
475 			ldev->pf[disabled_index].has_drop = true;
476 		else
477 			mlx5_core_err(dev,
478 				      "Failed to create lag drop rule, error: %d", err);
479 	}
480 }
481 
mlx5_cmd_modify_active_port(struct mlx5_core_dev * dev,u8 ports)482 static int mlx5_cmd_modify_active_port(struct mlx5_core_dev *dev, u8 ports)
483 {
484 	u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
485 	void *lag_ctx;
486 
487 	lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
488 
489 	MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
490 	MLX5_SET(modify_lag_in, in, field_select, 0x2);
491 
492 	MLX5_SET(lagc, lag_ctx, active_port, ports);
493 
494 	return mlx5_cmd_exec_in(dev, modify_lag, in);
495 }
496 
_mlx5_modify_lag(struct mlx5_lag * ldev,u8 * ports)497 static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports)
498 {
499 	int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
500 	struct mlx5_core_dev *dev0;
501 	u8 active_ports;
502 	int ret;
503 
504 	if (idx < 0)
505 		return -EINVAL;
506 
507 	dev0 = ldev->pf[idx].dev;
508 	if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags)) {
509 		ret = mlx5_lag_port_sel_modify(ldev, ports);
510 		if (ret ||
511 		    !MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table_bypass))
512 			return ret;
513 
514 		active_ports = lag_active_port_bits(ldev);
515 
516 		return mlx5_cmd_modify_active_port(dev0, active_ports);
517 	}
518 	return mlx5_cmd_modify_lag(dev0, ldev, ports);
519 }
520 
mlx5_lag_active_backup_get_netdev(struct mlx5_core_dev * dev)521 static struct net_device *mlx5_lag_active_backup_get_netdev(struct mlx5_core_dev *dev)
522 {
523 	struct net_device *ndev = NULL;
524 	struct mlx5_lag *ldev;
525 	unsigned long flags;
526 	int i, last_idx;
527 
528 	spin_lock_irqsave(&lag_lock, flags);
529 	ldev = mlx5_lag_dev(dev);
530 
531 	if (!ldev)
532 		goto unlock;
533 
534 	mlx5_ldev_for_each(i, 0, ldev)
535 		if (ldev->tracker.netdev_state[i].tx_enabled)
536 			ndev = ldev->pf[i].netdev;
537 	if (!ndev) {
538 		last_idx = mlx5_lag_get_dev_index_by_seq(ldev, ldev->ports - 1);
539 		if (last_idx < 0)
540 			goto unlock;
541 		ndev = ldev->pf[last_idx].netdev;
542 	}
543 
544 	dev_hold(ndev);
545 
546 unlock:
547 	spin_unlock_irqrestore(&lag_lock, flags);
548 
549 	return ndev;
550 }
551 
mlx5_modify_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker)552 void mlx5_modify_lag(struct mlx5_lag *ldev,
553 		     struct lag_tracker *tracker)
554 {
555 	int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
556 	u8 ports[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS] = {};
557 	struct mlx5_core_dev *dev0;
558 	int idx;
559 	int err;
560 	int i;
561 	int j;
562 
563 	if (first_idx < 0)
564 		return;
565 
566 	dev0 = ldev->pf[first_idx].dev;
567 	mlx5_infer_tx_affinity_mapping(tracker, ldev, ldev->buckets, ports);
568 
569 	mlx5_ldev_for_each(i, 0, ldev) {
570 		for (j = 0; j < ldev->buckets; j++) {
571 			idx = i * ldev->buckets + j;
572 			if (ports[idx] == ldev->v2p_map[idx])
573 				continue;
574 			err = _mlx5_modify_lag(ldev, ports);
575 			if (err) {
576 				mlx5_core_err(dev0,
577 					      "Failed to modify LAG (%d)\n",
578 					      err);
579 				return;
580 			}
581 			memcpy(ldev->v2p_map, ports, sizeof(ports));
582 
583 			mlx5_lag_print_mapping(dev0, ldev, tracker,
584 					       ldev->mode_flags);
585 			break;
586 		}
587 	}
588 
589 	if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
590 		struct net_device *ndev = mlx5_lag_active_backup_get_netdev(dev0);
591 
592 		if(!(ldev->mode == MLX5_LAG_MODE_ROCE))
593 			mlx5_lag_drop_rule_setup(ldev, tracker);
594 		/** Only sriov and roce lag should have tracker->tx_type set so
595 		 *  no need to check the mode
596 		 */
597 		blocking_notifier_call_chain(&dev0->priv.lag_nh,
598 					     MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE,
599 					     ndev);
600 		dev_put(ndev);
601 	}
602 }
603 
mlx5_lag_set_port_sel_mode(struct mlx5_lag * ldev,enum mlx5_lag_mode mode,unsigned long * flags)604 static int mlx5_lag_set_port_sel_mode(struct mlx5_lag *ldev,
605 				      enum mlx5_lag_mode mode,
606 				      unsigned long *flags)
607 {
608 	int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
609 	struct mlx5_core_dev *dev0;
610 
611 	if (first_idx < 0)
612 		return -EINVAL;
613 
614 	if (mode == MLX5_LAG_MODE_MPESW ||
615 	    mode == MLX5_LAG_MODE_MULTIPATH)
616 		return 0;
617 
618 	dev0 = ldev->pf[first_idx].dev;
619 
620 	if (!MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table)) {
621 		if (ldev->ports > 2)
622 			return -EINVAL;
623 		return 0;
624 	}
625 
626 	if (ldev->ports > 2)
627 		ldev->buckets = MLX5_LAG_MAX_HASH_BUCKETS;
628 
629 	set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags);
630 
631 	return 0;
632 }
633 
mlx5_lag_set_flags(struct mlx5_lag * ldev,enum mlx5_lag_mode mode,struct lag_tracker * tracker,bool shared_fdb,unsigned long * flags)634 static int mlx5_lag_set_flags(struct mlx5_lag *ldev, enum mlx5_lag_mode mode,
635 			      struct lag_tracker *tracker, bool shared_fdb,
636 			      unsigned long *flags)
637 {
638 	*flags = 0;
639 	if (shared_fdb) {
640 		set_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, flags);
641 		set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags);
642 	}
643 
644 	if (mode == MLX5_LAG_MODE_MPESW)
645 		set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags);
646 
647 	return mlx5_lag_set_port_sel_mode(ldev, mode, flags);
648 }
649 
mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode,unsigned long flags)650 char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags)
651 {
652 	int port_sel_mode = get_port_sel_mode(mode, flags);
653 
654 	switch (port_sel_mode) {
655 	case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY: return "queue_affinity";
656 	case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT: return "hash";
657 	case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW: return "mpesw";
658 	default: return "invalid";
659 	}
660 }
661 
mlx5_lag_create_single_fdb(struct mlx5_lag * ldev)662 static int mlx5_lag_create_single_fdb(struct mlx5_lag *ldev)
663 {
664 	int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
665 	struct mlx5_eswitch *master_esw;
666 	struct mlx5_core_dev *dev0;
667 	int i, j;
668 	int err;
669 
670 	if (first_idx < 0)
671 		return -EINVAL;
672 
673 	dev0 = ldev->pf[first_idx].dev;
674 	master_esw = dev0->priv.eswitch;
675 	mlx5_ldev_for_each(i, first_idx + 1, ldev) {
676 		struct mlx5_eswitch *slave_esw = ldev->pf[i].dev->priv.eswitch;
677 
678 		err = mlx5_eswitch_offloads_single_fdb_add_one(master_esw,
679 							       slave_esw, ldev->ports);
680 		if (err)
681 			goto err;
682 	}
683 	return 0;
684 err:
685 	mlx5_ldev_for_each_reverse(j, i, first_idx + 1, ldev)
686 		mlx5_eswitch_offloads_single_fdb_del_one(master_esw,
687 							 ldev->pf[j].dev->priv.eswitch);
688 	return err;
689 }
690 
mlx5_create_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker,enum mlx5_lag_mode mode,unsigned long flags)691 static int mlx5_create_lag(struct mlx5_lag *ldev,
692 			   struct lag_tracker *tracker,
693 			   enum mlx5_lag_mode mode,
694 			   unsigned long flags)
695 {
696 	bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
697 	int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
698 	u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
699 	struct mlx5_core_dev *dev0;
700 	int err;
701 
702 	if (first_idx < 0)
703 		return -EINVAL;
704 
705 	dev0 = ldev->pf[first_idx].dev;
706 	if (tracker)
707 		mlx5_lag_print_mapping(dev0, ldev, tracker, flags);
708 	mlx5_core_info(dev0, "shared_fdb:%d mode:%s\n",
709 		       shared_fdb, mlx5_get_str_port_sel_mode(mode, flags));
710 
711 	err = mlx5_cmd_create_lag(dev0, ldev, mode, flags);
712 	if (err) {
713 		mlx5_core_err(dev0,
714 			      "Failed to create LAG (%d)\n",
715 			      err);
716 		return err;
717 	}
718 
719 	if (shared_fdb) {
720 		err = mlx5_lag_create_single_fdb(ldev);
721 		if (err)
722 			mlx5_core_err(dev0, "Can't enable single FDB mode\n");
723 		else
724 			mlx5_core_info(dev0, "Operation mode is single FDB\n");
725 	}
726 
727 	if (err) {
728 		MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
729 		if (mlx5_cmd_exec_in(dev0, destroy_lag, in))
730 			mlx5_core_err(dev0,
731 				      "Failed to deactivate RoCE LAG; driver restart required\n");
732 	}
733 	BLOCKING_INIT_NOTIFIER_HEAD(&dev0->priv.lag_nh);
734 
735 	return err;
736 }
737 
mlx5_activate_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker,enum mlx5_lag_mode mode,bool shared_fdb)738 int mlx5_activate_lag(struct mlx5_lag *ldev,
739 		      struct lag_tracker *tracker,
740 		      enum mlx5_lag_mode mode,
741 		      bool shared_fdb)
742 {
743 	int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
744 	bool roce_lag = mode == MLX5_LAG_MODE_ROCE;
745 	struct mlx5_core_dev *dev0;
746 	unsigned long flags = 0;
747 	int err;
748 
749 	if (first_idx < 0)
750 		return -EINVAL;
751 
752 	dev0 = ldev->pf[first_idx].dev;
753 	err = mlx5_lag_set_flags(ldev, mode, tracker, shared_fdb, &flags);
754 	if (err)
755 		return err;
756 
757 	if (mode != MLX5_LAG_MODE_MPESW) {
758 		mlx5_infer_tx_affinity_mapping(tracker, ldev, ldev->buckets, ldev->v2p_map);
759 		if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
760 			err = mlx5_lag_port_sel_create(ldev, tracker->hash_type,
761 						       ldev->v2p_map);
762 			if (err) {
763 				mlx5_core_err(dev0,
764 					      "Failed to create LAG port selection(%d)\n",
765 					      err);
766 				return err;
767 			}
768 		}
769 	}
770 
771 	err = mlx5_create_lag(ldev, tracker, mode, flags);
772 	if (err) {
773 		if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
774 			mlx5_lag_port_sel_destroy(ldev);
775 		if (roce_lag)
776 			mlx5_core_err(dev0,
777 				      "Failed to activate RoCE LAG\n");
778 		else
779 			mlx5_core_err(dev0,
780 				      "Failed to activate VF LAG\n"
781 				      "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
782 		return err;
783 	}
784 
785 	if (tracker && tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP &&
786 	    !roce_lag)
787 		mlx5_lag_drop_rule_setup(ldev, tracker);
788 
789 	ldev->mode = mode;
790 	ldev->mode_flags = flags;
791 	return 0;
792 }
793 
mlx5_deactivate_lag(struct mlx5_lag * ldev)794 int mlx5_deactivate_lag(struct mlx5_lag *ldev)
795 {
796 	int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
797 	u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
798 	bool roce_lag = __mlx5_lag_is_roce(ldev);
799 	unsigned long flags = ldev->mode_flags;
800 	struct mlx5_eswitch *master_esw;
801 	struct mlx5_core_dev *dev0;
802 	int err;
803 	int i;
804 
805 	if (first_idx < 0)
806 		return -EINVAL;
807 
808 	dev0 = ldev->pf[first_idx].dev;
809 	master_esw = dev0->priv.eswitch;
810 	ldev->mode = MLX5_LAG_MODE_NONE;
811 	ldev->mode_flags = 0;
812 	mlx5_lag_mp_reset(ldev);
813 
814 	if (test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags)) {
815 		mlx5_ldev_for_each(i, first_idx + 1, ldev)
816 			mlx5_eswitch_offloads_single_fdb_del_one(master_esw,
817 								 ldev->pf[i].dev->priv.eswitch);
818 		clear_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
819 	}
820 
821 	MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
822 	err = mlx5_cmd_exec_in(dev0, destroy_lag, in);
823 	if (err) {
824 		if (roce_lag) {
825 			mlx5_core_err(dev0,
826 				      "Failed to deactivate RoCE LAG; driver restart required\n");
827 		} else {
828 			mlx5_core_err(dev0,
829 				      "Failed to deactivate VF LAG; driver restart required\n"
830 				      "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
831 		}
832 		return err;
833 	}
834 
835 	if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
836 		mlx5_lag_port_sel_destroy(ldev);
837 		ldev->buckets = 1;
838 	}
839 	if (mlx5_lag_has_drop_rule(ldev))
840 		mlx5_lag_drop_rule_cleanup(ldev);
841 
842 	return 0;
843 }
844 
mlx5_lag_check_prereq(struct mlx5_lag * ldev)845 bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
846 {
847 	int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
848 #ifdef CONFIG_MLX5_ESWITCH
849 	struct mlx5_core_dev *dev;
850 	u8 mode;
851 #endif
852 	bool roce_support;
853 	int i;
854 
855 	if (first_idx < 0 || mlx5_lag_num_devs(ldev) != ldev->ports)
856 		return false;
857 
858 #ifdef CONFIG_MLX5_ESWITCH
859 	mlx5_ldev_for_each(i, 0, ldev) {
860 		dev = ldev->pf[i].dev;
861 		if (mlx5_eswitch_num_vfs(dev->priv.eswitch) && !is_mdev_switchdev_mode(dev))
862 			return false;
863 	}
864 
865 	dev = ldev->pf[first_idx].dev;
866 	mode = mlx5_eswitch_mode(dev);
867 	mlx5_ldev_for_each(i, 0, ldev)
868 		if (mlx5_eswitch_mode(ldev->pf[i].dev) != mode)
869 			return false;
870 
871 #else
872 	mlx5_ldev_for_each(i, 0, ldev)
873 		if (mlx5_sriov_is_enabled(ldev->pf[i].dev))
874 			return false;
875 #endif
876 	roce_support = mlx5_get_roce_state(ldev->pf[first_idx].dev);
877 	mlx5_ldev_for_each(i, first_idx + 1, ldev)
878 		if (mlx5_get_roce_state(ldev->pf[i].dev) != roce_support)
879 			return false;
880 
881 	return true;
882 }
883 
mlx5_lag_add_devices(struct mlx5_lag * ldev)884 void mlx5_lag_add_devices(struct mlx5_lag *ldev)
885 {
886 	int i;
887 
888 	mlx5_ldev_for_each(i, 0, ldev) {
889 		if (ldev->pf[i].dev->priv.flags &
890 		    MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
891 			continue;
892 
893 		ldev->pf[i].dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
894 		mlx5_rescan_drivers_locked(ldev->pf[i].dev);
895 	}
896 }
897 
mlx5_lag_remove_devices(struct mlx5_lag * ldev)898 void mlx5_lag_remove_devices(struct mlx5_lag *ldev)
899 {
900 	int i;
901 
902 	mlx5_ldev_for_each(i, 0, ldev) {
903 		if (ldev->pf[i].dev->priv.flags &
904 		    MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
905 			continue;
906 
907 		ldev->pf[i].dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
908 		mlx5_rescan_drivers_locked(ldev->pf[i].dev);
909 	}
910 }
911 
mlx5_disable_lag(struct mlx5_lag * ldev)912 void mlx5_disable_lag(struct mlx5_lag *ldev)
913 {
914 	bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
915 	int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
916 	struct mlx5_core_dev *dev0;
917 	bool roce_lag;
918 	int err;
919 	int i;
920 
921 	if (idx < 0)
922 		return;
923 
924 	dev0 = ldev->pf[idx].dev;
925 	roce_lag = __mlx5_lag_is_roce(ldev);
926 
927 	if (shared_fdb) {
928 		mlx5_lag_remove_devices(ldev);
929 	} else if (roce_lag) {
930 		if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) {
931 			dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
932 			mlx5_rescan_drivers_locked(dev0);
933 		}
934 		mlx5_ldev_for_each(i, idx + 1, ldev)
935 			mlx5_nic_vport_disable_roce(ldev->pf[i].dev);
936 	}
937 
938 	err = mlx5_deactivate_lag(ldev);
939 	if (err)
940 		return;
941 
942 	if (shared_fdb || roce_lag)
943 		mlx5_lag_add_devices(ldev);
944 
945 	if (shared_fdb)
946 		mlx5_ldev_for_each(i, 0, ldev)
947 			if (!(ldev->pf[i].dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
948 				mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch);
949 }
950 
mlx5_lag_shared_fdb_supported(struct mlx5_lag * ldev)951 bool mlx5_lag_shared_fdb_supported(struct mlx5_lag *ldev)
952 {
953 	int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
954 	struct mlx5_core_dev *dev;
955 	int i;
956 
957 	if (idx < 0)
958 		return false;
959 
960 	mlx5_ldev_for_each(i, idx + 1, ldev) {
961 		dev = ldev->pf[i].dev;
962 		if (is_mdev_switchdev_mode(dev) &&
963 		    mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) &&
964 		    MLX5_CAP_GEN(dev, lag_native_fdb_selection) &&
965 		    MLX5_CAP_ESW(dev, root_ft_on_other_esw) &&
966 		    mlx5_eswitch_get_npeers(dev->priv.eswitch) ==
967 		    MLX5_CAP_GEN(dev, num_lag_ports) - 1)
968 			continue;
969 		return false;
970 	}
971 
972 	dev = ldev->pf[idx].dev;
973 	if (is_mdev_switchdev_mode(dev) &&
974 	    mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) &&
975 	    mlx5_esw_offloads_devcom_is_ready(dev->priv.eswitch) &&
976 	    MLX5_CAP_ESW(dev, esw_shared_ingress_acl) &&
977 	    mlx5_eswitch_get_npeers(dev->priv.eswitch) == MLX5_CAP_GEN(dev, num_lag_ports) - 1)
978 		return true;
979 
980 	return false;
981 }
982 
mlx5_lag_is_roce_lag(struct mlx5_lag * ldev)983 static bool mlx5_lag_is_roce_lag(struct mlx5_lag *ldev)
984 {
985 	bool roce_lag = true;
986 	int i;
987 
988 	mlx5_ldev_for_each(i, 0, ldev)
989 		roce_lag = roce_lag && !mlx5_sriov_is_enabled(ldev->pf[i].dev);
990 
991 #ifdef CONFIG_MLX5_ESWITCH
992 	mlx5_ldev_for_each(i, 0, ldev)
993 		roce_lag = roce_lag && is_mdev_legacy_mode(ldev->pf[i].dev);
994 #endif
995 
996 	return roce_lag;
997 }
998 
mlx5_lag_should_modify_lag(struct mlx5_lag * ldev,bool do_bond)999 static bool mlx5_lag_should_modify_lag(struct mlx5_lag *ldev, bool do_bond)
1000 {
1001 	return do_bond && __mlx5_lag_is_active(ldev) &&
1002 	       ldev->mode != MLX5_LAG_MODE_MPESW;
1003 }
1004 
mlx5_lag_should_disable_lag(struct mlx5_lag * ldev,bool do_bond)1005 static bool mlx5_lag_should_disable_lag(struct mlx5_lag *ldev, bool do_bond)
1006 {
1007 	return !do_bond && __mlx5_lag_is_active(ldev) &&
1008 	       ldev->mode != MLX5_LAG_MODE_MPESW;
1009 }
1010 
1011 #ifdef CONFIG_MLX5_ESWITCH
1012 static int
mlx5_lag_sum_devices_speed(struct mlx5_lag * ldev,u32 * sum_speed,int (* get_speed)(struct mlx5_core_dev *,u32 *))1013 mlx5_lag_sum_devices_speed(struct mlx5_lag *ldev, u32 *sum_speed,
1014 			   int (*get_speed)(struct mlx5_core_dev *, u32 *))
1015 {
1016 	struct mlx5_core_dev *pf_mdev;
1017 	int pf_idx;
1018 	u32 speed;
1019 	int ret;
1020 
1021 	*sum_speed = 0;
1022 	mlx5_ldev_for_each(pf_idx, 0, ldev) {
1023 		pf_mdev = ldev->pf[pf_idx].dev;
1024 		if (!pf_mdev)
1025 			continue;
1026 
1027 		ret = get_speed(pf_mdev, &speed);
1028 		if (ret) {
1029 			mlx5_core_dbg(pf_mdev,
1030 				      "Failed to get device speed using %ps. Device %s speed is not available (err=%d)\n",
1031 				      get_speed, dev_name(pf_mdev->device),
1032 				      ret);
1033 			return ret;
1034 		}
1035 
1036 		*sum_speed += speed;
1037 	}
1038 
1039 	return 0;
1040 }
1041 
mlx5_lag_sum_devices_max_speed(struct mlx5_lag * ldev,u32 * max_speed)1042 static int mlx5_lag_sum_devices_max_speed(struct mlx5_lag *ldev, u32 *max_speed)
1043 {
1044 	return mlx5_lag_sum_devices_speed(ldev, max_speed,
1045 					  mlx5_port_max_linkspeed);
1046 }
1047 
mlx5_lag_sum_devices_oper_speed(struct mlx5_lag * ldev,u32 * oper_speed)1048 static int mlx5_lag_sum_devices_oper_speed(struct mlx5_lag *ldev,
1049 					   u32 *oper_speed)
1050 {
1051 	return mlx5_lag_sum_devices_speed(ldev, oper_speed,
1052 					  mlx5_port_oper_linkspeed);
1053 }
1054 
mlx5_lag_modify_device_vports_speed(struct mlx5_core_dev * mdev,u32 speed)1055 static void mlx5_lag_modify_device_vports_speed(struct mlx5_core_dev *mdev,
1056 						u32 speed)
1057 {
1058 	u16 op_mod = MLX5_VPORT_STATE_OP_MOD_ESW_VPORT;
1059 	struct mlx5_eswitch *esw = mdev->priv.eswitch;
1060 	struct mlx5_vport *vport;
1061 	unsigned long i;
1062 	int ret;
1063 
1064 	if (!esw)
1065 		return;
1066 
1067 	if (!MLX5_CAP_ESW(mdev, esw_vport_state_max_tx_speed))
1068 		return;
1069 
1070 	mlx5_esw_for_each_vport(esw, i, vport) {
1071 		if (!vport)
1072 			continue;
1073 
1074 		if (vport->vport == MLX5_VPORT_UPLINK)
1075 			continue;
1076 
1077 		ret = mlx5_modify_vport_max_tx_speed(mdev, op_mod,
1078 						     vport->vport, true, speed);
1079 		if (ret)
1080 			mlx5_core_dbg(mdev,
1081 				      "Failed to set vport %d speed %d, err=%d\n",
1082 				      vport->vport, speed, ret);
1083 	}
1084 }
1085 
mlx5_lag_set_vports_agg_speed(struct mlx5_lag * ldev)1086 void mlx5_lag_set_vports_agg_speed(struct mlx5_lag *ldev)
1087 {
1088 	struct mlx5_core_dev *mdev;
1089 	u32 speed;
1090 	int pf_idx;
1091 
1092 	if (ldev->mode == MLX5_LAG_MODE_MPESW) {
1093 		if (mlx5_lag_sum_devices_oper_speed(ldev, &speed))
1094 			return;
1095 	} else {
1096 		speed = ldev->tracker.bond_speed_mbps;
1097 		if (speed == SPEED_UNKNOWN)
1098 			return;
1099 	}
1100 
1101 	/* If speed is not set, use the sum of max speeds of all PFs */
1102 	if (!speed && mlx5_lag_sum_devices_max_speed(ldev, &speed))
1103 		return;
1104 
1105 	speed = speed / MLX5_MAX_TX_SPEED_UNIT;
1106 
1107 	mlx5_ldev_for_each(pf_idx, 0, ldev) {
1108 		mdev = ldev->pf[pf_idx].dev;
1109 		if (!mdev)
1110 			continue;
1111 
1112 		mlx5_lag_modify_device_vports_speed(mdev, speed);
1113 	}
1114 }
1115 
mlx5_lag_reset_vports_speed(struct mlx5_lag * ldev)1116 void mlx5_lag_reset_vports_speed(struct mlx5_lag *ldev)
1117 {
1118 	struct mlx5_core_dev *mdev;
1119 	u32 speed;
1120 	int pf_idx;
1121 	int ret;
1122 
1123 	mlx5_ldev_for_each(pf_idx, 0, ldev) {
1124 		mdev = ldev->pf[pf_idx].dev;
1125 		if (!mdev)
1126 			continue;
1127 
1128 		ret = mlx5_port_oper_linkspeed(mdev, &speed);
1129 		if (ret) {
1130 			mlx5_core_dbg(mdev,
1131 				      "Failed to reset vports speed for device %s. Oper speed is not available (err=%d)\n",
1132 				      dev_name(mdev->device), ret);
1133 			continue;
1134 		}
1135 
1136 		speed = speed / MLX5_MAX_TX_SPEED_UNIT;
1137 		mlx5_lag_modify_device_vports_speed(mdev, speed);
1138 	}
1139 }
1140 #endif
1141 
mlx5_do_bond(struct mlx5_lag * ldev)1142 static void mlx5_do_bond(struct mlx5_lag *ldev)
1143 {
1144 	int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
1145 	struct lag_tracker tracker = { };
1146 	struct mlx5_core_dev *dev0;
1147 	struct net_device *ndev;
1148 	bool do_bond, roce_lag;
1149 	int err;
1150 	int i;
1151 
1152 	if (idx < 0)
1153 		return;
1154 
1155 	dev0 = ldev->pf[idx].dev;
1156 	if (!mlx5_lag_is_ready(ldev)) {
1157 		do_bond = false;
1158 	} else {
1159 		/* VF LAG is in multipath mode, ignore bond change requests */
1160 		if (mlx5_lag_is_multipath(dev0))
1161 			return;
1162 
1163 		tracker = ldev->tracker;
1164 
1165 		do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
1166 	}
1167 
1168 	if (do_bond && !__mlx5_lag_is_active(ldev)) {
1169 		bool shared_fdb = mlx5_lag_shared_fdb_supported(ldev);
1170 
1171 		roce_lag = mlx5_lag_is_roce_lag(ldev);
1172 
1173 		if (shared_fdb || roce_lag)
1174 			mlx5_lag_remove_devices(ldev);
1175 
1176 		err = mlx5_activate_lag(ldev, &tracker,
1177 					roce_lag ? MLX5_LAG_MODE_ROCE :
1178 						   MLX5_LAG_MODE_SRIOV,
1179 					shared_fdb);
1180 		if (err) {
1181 			if (shared_fdb || roce_lag)
1182 				mlx5_lag_add_devices(ldev);
1183 			if (shared_fdb) {
1184 				mlx5_ldev_for_each(i, 0, ldev)
1185 					mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch);
1186 			}
1187 
1188 			return;
1189 		} else if (roce_lag) {
1190 			dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
1191 			mlx5_rescan_drivers_locked(dev0);
1192 			mlx5_ldev_for_each(i, idx + 1, ldev) {
1193 				if (mlx5_get_roce_state(ldev->pf[i].dev))
1194 					mlx5_nic_vport_enable_roce(ldev->pf[i].dev);
1195 			}
1196 		} else if (shared_fdb) {
1197 			int i;
1198 
1199 			dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
1200 			mlx5_rescan_drivers_locked(dev0);
1201 
1202 			mlx5_ldev_for_each(i, 0, ldev) {
1203 				err = mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch);
1204 				if (err)
1205 					break;
1206 			}
1207 
1208 			if (err) {
1209 				dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
1210 				mlx5_rescan_drivers_locked(dev0);
1211 				mlx5_deactivate_lag(ldev);
1212 				mlx5_lag_add_devices(ldev);
1213 				mlx5_ldev_for_each(i, 0, ldev)
1214 					mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch);
1215 				mlx5_core_err(dev0, "Failed to enable lag\n");
1216 				return;
1217 			}
1218 		}
1219 		if (tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
1220 			ndev = mlx5_lag_active_backup_get_netdev(dev0);
1221 			/** Only sriov and roce lag should have tracker->TX_type
1222 			 *  set so no need to check the mode
1223 			 */
1224 			blocking_notifier_call_chain(&dev0->priv.lag_nh,
1225 						     MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE,
1226 						     ndev);
1227 			dev_put(ndev);
1228 		}
1229 		mlx5_lag_set_vports_agg_speed(ldev);
1230 	} else if (mlx5_lag_should_modify_lag(ldev, do_bond)) {
1231 		mlx5_modify_lag(ldev, &tracker);
1232 		mlx5_lag_set_vports_agg_speed(ldev);
1233 	} else if (mlx5_lag_should_disable_lag(ldev, do_bond)) {
1234 		mlx5_lag_reset_vports_speed(ldev);
1235 		mlx5_disable_lag(ldev);
1236 	}
1237 }
1238 
1239 /* The last mdev to unregister will destroy the workqueue before removing the
1240  * devcom component, and as all the mdevs use the same devcom component we are
1241  * guaranteed that the devcom is valid while the calling work is running.
1242  */
mlx5_lag_get_devcom_comp(struct mlx5_lag * ldev)1243 struct mlx5_devcom_comp_dev *mlx5_lag_get_devcom_comp(struct mlx5_lag *ldev)
1244 {
1245 	struct mlx5_devcom_comp_dev *devcom = NULL;
1246 	int i;
1247 
1248 	mutex_lock(&ldev->lock);
1249 	i = mlx5_get_next_ldev_func(ldev, 0);
1250 	if (i < MLX5_MAX_PORTS)
1251 		devcom = ldev->pf[i].dev->priv.hca_devcom_comp;
1252 	mutex_unlock(&ldev->lock);
1253 	return devcom;
1254 }
1255 
mlx5_queue_bond_work(struct mlx5_lag * ldev,unsigned long delay)1256 static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
1257 {
1258 	queue_delayed_work(ldev->wq, &ldev->bond_work, delay);
1259 }
1260 
mlx5_do_bond_work(struct work_struct * work)1261 static void mlx5_do_bond_work(struct work_struct *work)
1262 {
1263 	struct delayed_work *delayed_work = to_delayed_work(work);
1264 	struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag,
1265 					     bond_work);
1266 	struct mlx5_devcom_comp_dev *devcom;
1267 	int status;
1268 
1269 	devcom = mlx5_lag_get_devcom_comp(ldev);
1270 	if (!devcom)
1271 		return;
1272 
1273 	status = mlx5_devcom_comp_trylock(devcom);
1274 	if (!status) {
1275 		mlx5_queue_bond_work(ldev, HZ);
1276 		return;
1277 	}
1278 
1279 	mutex_lock(&ldev->lock);
1280 	if (ldev->mode_changes_in_progress) {
1281 		mutex_unlock(&ldev->lock);
1282 		mlx5_devcom_comp_unlock(devcom);
1283 		mlx5_queue_bond_work(ldev, HZ);
1284 		return;
1285 	}
1286 
1287 	mlx5_do_bond(ldev);
1288 	mutex_unlock(&ldev->lock);
1289 	mlx5_devcom_comp_unlock(devcom);
1290 }
1291 
mlx5_handle_changeupper_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct netdev_notifier_changeupper_info * info)1292 static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
1293 					 struct lag_tracker *tracker,
1294 					 struct netdev_notifier_changeupper_info *info)
1295 {
1296 	struct net_device *upper = info->upper_dev, *ndev_tmp;
1297 	struct netdev_lag_upper_info *lag_upper_info = NULL;
1298 	bool is_bonded, is_in_lag, mode_supported;
1299 	bool has_inactive = 0;
1300 	struct slave *slave;
1301 	u8 bond_status = 0;
1302 	int num_slaves = 0;
1303 	int changed = 0;
1304 	int i, idx = -1;
1305 
1306 	if (!netif_is_lag_master(upper))
1307 		return 0;
1308 
1309 	if (info->linking)
1310 		lag_upper_info = info->upper_info;
1311 
1312 	/* The event may still be of interest if the slave does not belong to
1313 	 * us, but is enslaved to a master which has one or more of our netdevs
1314 	 * as slaves (e.g., if a new slave is added to a master that bonds two
1315 	 * of our netdevs, we should unbond).
1316 	 */
1317 	rcu_read_lock();
1318 	for_each_netdev_in_bond_rcu(upper, ndev_tmp) {
1319 		mlx5_ldev_for_each(i, 0, ldev) {
1320 			if (ldev->pf[i].netdev == ndev_tmp) {
1321 				idx++;
1322 				break;
1323 			}
1324 		}
1325 		if (i < MLX5_MAX_PORTS) {
1326 			slave = bond_slave_get_rcu(ndev_tmp);
1327 			if (slave)
1328 				has_inactive |= bond_is_slave_inactive(slave);
1329 			bond_status |= (1 << idx);
1330 		}
1331 
1332 		num_slaves++;
1333 	}
1334 	rcu_read_unlock();
1335 
1336 	/* None of this lagdev's netdevs are slaves of this master. */
1337 	if (!(bond_status & GENMASK(ldev->ports - 1, 0)))
1338 		return 0;
1339 
1340 	if (lag_upper_info) {
1341 		tracker->tx_type = lag_upper_info->tx_type;
1342 		tracker->hash_type = lag_upper_info->hash_type;
1343 	}
1344 
1345 	tracker->has_inactive = has_inactive;
1346 	/* Determine bonding status:
1347 	 * A device is considered bonded if both its physical ports are slaves
1348 	 * of the same lag master, and only them.
1349 	 */
1350 	is_in_lag = num_slaves == ldev->ports &&
1351 		bond_status == GENMASK(ldev->ports - 1, 0);
1352 
1353 	/* Lag mode must be activebackup or hash. */
1354 	mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP ||
1355 			 tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH;
1356 
1357 	is_bonded = is_in_lag && mode_supported;
1358 	if (tracker->is_bonded != is_bonded) {
1359 		tracker->is_bonded = is_bonded;
1360 		changed = 1;
1361 	}
1362 
1363 	if (!is_in_lag)
1364 		return changed;
1365 
1366 	if (!mlx5_lag_is_ready(ldev))
1367 		NL_SET_ERR_MSG_MOD(info->info.extack,
1368 				   "Can't activate LAG offload, PF is configured with more than 64 VFs");
1369 	else if (!mode_supported)
1370 		NL_SET_ERR_MSG_MOD(info->info.extack,
1371 				   "Can't activate LAG offload, TX type isn't supported");
1372 
1373 	return changed;
1374 }
1375 
mlx5_handle_changelowerstate_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct net_device * ndev,struct netdev_notifier_changelowerstate_info * info)1376 static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev,
1377 					      struct lag_tracker *tracker,
1378 					      struct net_device *ndev,
1379 					      struct netdev_notifier_changelowerstate_info *info)
1380 {
1381 	struct netdev_lag_lower_state_info *lag_lower_info;
1382 	int idx;
1383 
1384 	if (!netif_is_lag_port(ndev))
1385 		return 0;
1386 
1387 	idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev);
1388 	if (idx < 0)
1389 		return 0;
1390 
1391 	/* This information is used to determine virtual to physical
1392 	 * port mapping.
1393 	 */
1394 	lag_lower_info = info->lower_state_info;
1395 	if (!lag_lower_info)
1396 		return 0;
1397 
1398 	tracker->netdev_state[idx] = *lag_lower_info;
1399 
1400 	return 1;
1401 }
1402 
mlx5_handle_changeinfodata_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct net_device * ndev)1403 static int mlx5_handle_changeinfodata_event(struct mlx5_lag *ldev,
1404 					    struct lag_tracker *tracker,
1405 					    struct net_device *ndev)
1406 {
1407 	struct net_device *ndev_tmp;
1408 	struct slave *slave;
1409 	bool has_inactive = 0;
1410 	int idx;
1411 
1412 	if (!netif_is_lag_master(ndev))
1413 		return 0;
1414 
1415 	rcu_read_lock();
1416 	for_each_netdev_in_bond_rcu(ndev, ndev_tmp) {
1417 		idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
1418 		if (idx < 0)
1419 			continue;
1420 
1421 		slave = bond_slave_get_rcu(ndev_tmp);
1422 		if (slave)
1423 			has_inactive |= bond_is_slave_inactive(slave);
1424 	}
1425 	rcu_read_unlock();
1426 
1427 	if (tracker->has_inactive == has_inactive)
1428 		return 0;
1429 
1430 	tracker->has_inactive = has_inactive;
1431 
1432 	return 1;
1433 }
1434 
mlx5_lag_update_tracker_speed(struct lag_tracker * tracker,struct net_device * ndev)1435 static void mlx5_lag_update_tracker_speed(struct lag_tracker *tracker,
1436 					  struct net_device *ndev)
1437 {
1438 	struct ethtool_link_ksettings lksettings;
1439 	struct net_device *bond_dev;
1440 	int err;
1441 
1442 	if (netif_is_lag_master(ndev))
1443 		bond_dev = ndev;
1444 	else
1445 		bond_dev = netdev_master_upper_dev_get(ndev);
1446 
1447 	if (!bond_dev) {
1448 		tracker->bond_speed_mbps = SPEED_UNKNOWN;
1449 		return;
1450 	}
1451 
1452 	err = __ethtool_get_link_ksettings(bond_dev, &lksettings);
1453 	if (err) {
1454 		netdev_dbg(bond_dev,
1455 			   "Failed to get speed for bond dev %s, err=%d\n",
1456 			   bond_dev->name, err);
1457 		tracker->bond_speed_mbps = SPEED_UNKNOWN;
1458 		return;
1459 	}
1460 
1461 	if (lksettings.base.speed == SPEED_UNKNOWN)
1462 		tracker->bond_speed_mbps = 0;
1463 	else
1464 		tracker->bond_speed_mbps = lksettings.base.speed;
1465 }
1466 
1467 /* Returns speed in Mbps. */
mlx5_lag_query_bond_speed(struct mlx5_core_dev * mdev,u32 * speed)1468 int mlx5_lag_query_bond_speed(struct mlx5_core_dev *mdev, u32 *speed)
1469 {
1470 	struct mlx5_lag *ldev;
1471 	unsigned long flags;
1472 	int ret = 0;
1473 
1474 	spin_lock_irqsave(&lag_lock, flags);
1475 	ldev = mlx5_lag_dev(mdev);
1476 	if (!ldev) {
1477 		ret = -ENODEV;
1478 		goto unlock;
1479 	}
1480 
1481 	*speed = ldev->tracker.bond_speed_mbps;
1482 
1483 	if (*speed == SPEED_UNKNOWN) {
1484 		mlx5_core_dbg(mdev, "Bond speed is unknown\n");
1485 		ret = -EINVAL;
1486 	}
1487 
1488 unlock:
1489 	spin_unlock_irqrestore(&lag_lock, flags);
1490 	return ret;
1491 }
1492 EXPORT_SYMBOL_GPL(mlx5_lag_query_bond_speed);
1493 
1494 /* this handler is always registered to netdev events */
mlx5_lag_netdev_event(struct notifier_block * this,unsigned long event,void * ptr)1495 static int mlx5_lag_netdev_event(struct notifier_block *this,
1496 				 unsigned long event, void *ptr)
1497 {
1498 	struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
1499 	struct lag_tracker tracker;
1500 	struct mlx5_lag *ldev;
1501 	int changed = 0;
1502 
1503 	if (event != NETDEV_CHANGEUPPER &&
1504 	    event != NETDEV_CHANGELOWERSTATE &&
1505 	    event != NETDEV_CHANGEINFODATA)
1506 		return NOTIFY_DONE;
1507 
1508 	ldev    = container_of(this, struct mlx5_lag, nb);
1509 
1510 	tracker = ldev->tracker;
1511 
1512 	switch (event) {
1513 	case NETDEV_CHANGEUPPER:
1514 		changed = mlx5_handle_changeupper_event(ldev, &tracker, ptr);
1515 		break;
1516 	case NETDEV_CHANGELOWERSTATE:
1517 		changed = mlx5_handle_changelowerstate_event(ldev, &tracker,
1518 							     ndev, ptr);
1519 		break;
1520 	case NETDEV_CHANGEINFODATA:
1521 		changed = mlx5_handle_changeinfodata_event(ldev, &tracker, ndev);
1522 		break;
1523 	}
1524 
1525 	if (changed)
1526 		mlx5_lag_update_tracker_speed(&tracker, ndev);
1527 
1528 	ldev->tracker = tracker;
1529 
1530 	if (changed)
1531 		mlx5_queue_bond_work(ldev, 0);
1532 
1533 	return NOTIFY_DONE;
1534 }
1535 
mlx5_ldev_add_netdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev,struct net_device * netdev)1536 static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev,
1537 				struct mlx5_core_dev *dev,
1538 				struct net_device *netdev)
1539 {
1540 	unsigned int fn = mlx5_get_dev_index(dev);
1541 	unsigned long flags;
1542 
1543 	spin_lock_irqsave(&lag_lock, flags);
1544 	ldev->pf[fn].netdev = netdev;
1545 	ldev->tracker.netdev_state[fn].link_up = 0;
1546 	ldev->tracker.netdev_state[fn].tx_enabled = 0;
1547 	spin_unlock_irqrestore(&lag_lock, flags);
1548 }
1549 
mlx5_ldev_remove_netdev(struct mlx5_lag * ldev,struct net_device * netdev)1550 static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev,
1551 				    struct net_device *netdev)
1552 {
1553 	unsigned long flags;
1554 	int i;
1555 
1556 	spin_lock_irqsave(&lag_lock, flags);
1557 	mlx5_ldev_for_each(i, 0, ldev) {
1558 		if (ldev->pf[i].netdev == netdev) {
1559 			ldev->pf[i].netdev = NULL;
1560 			break;
1561 		}
1562 	}
1563 	spin_unlock_irqrestore(&lag_lock, flags);
1564 }
1565 
mlx5_ldev_add_mdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev)1566 static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev,
1567 			      struct mlx5_core_dev *dev)
1568 {
1569 	unsigned int fn = mlx5_get_dev_index(dev);
1570 
1571 	ldev->pf[fn].dev = dev;
1572 	dev->priv.lag = ldev;
1573 
1574 	MLX5_NB_INIT(&ldev->pf[fn].port_change_nb,
1575 		     mlx5_lag_mpesw_port_change_event, PORT_CHANGE);
1576 	mlx5_eq_notifier_register(dev, &ldev->pf[fn].port_change_nb);
1577 }
1578 
mlx5_ldev_remove_mdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev)1579 static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
1580 				  struct mlx5_core_dev *dev)
1581 {
1582 	int fn;
1583 
1584 	fn = mlx5_get_dev_index(dev);
1585 	if (ldev->pf[fn].dev != dev)
1586 		return;
1587 
1588 	if (ldev->pf[fn].port_change_nb.nb.notifier_call)
1589 		mlx5_eq_notifier_unregister(dev, &ldev->pf[fn].port_change_nb);
1590 
1591 	ldev->pf[fn].dev = NULL;
1592 	dev->priv.lag = NULL;
1593 }
1594 
1595 /* Must be called with HCA devcom component lock held */
__mlx5_lag_dev_add_mdev(struct mlx5_core_dev * dev)1596 static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
1597 {
1598 	struct mlx5_devcom_comp_dev *pos = NULL;
1599 	struct mlx5_lag *ldev = NULL;
1600 	struct mlx5_core_dev *tmp_dev;
1601 
1602 	tmp_dev = mlx5_devcom_get_next_peer_data(dev->priv.hca_devcom_comp, &pos);
1603 	if (tmp_dev)
1604 		ldev = mlx5_lag_dev(tmp_dev);
1605 
1606 	if (!ldev) {
1607 		ldev = mlx5_lag_dev_alloc(dev);
1608 		if (!ldev) {
1609 			mlx5_core_err(dev, "Failed to alloc lag dev\n");
1610 			return 0;
1611 		}
1612 		mlx5_ldev_add_mdev(ldev, dev);
1613 		return 0;
1614 	}
1615 
1616 	mutex_lock(&ldev->lock);
1617 	if (ldev->mode_changes_in_progress) {
1618 		mutex_unlock(&ldev->lock);
1619 		return -EAGAIN;
1620 	}
1621 	mlx5_ldev_get(ldev);
1622 	mlx5_ldev_add_mdev(ldev, dev);
1623 	mutex_unlock(&ldev->lock);
1624 
1625 	return 0;
1626 }
1627 
mlx5_lag_unregister_hca_devcom_comp(struct mlx5_core_dev * dev)1628 static void mlx5_lag_unregister_hca_devcom_comp(struct mlx5_core_dev *dev)
1629 {
1630 	mlx5_devcom_unregister_component(dev->priv.hca_devcom_comp);
1631 	dev->priv.hca_devcom_comp = NULL;
1632 }
1633 
mlx5_lag_register_hca_devcom_comp(struct mlx5_core_dev * dev)1634 static int mlx5_lag_register_hca_devcom_comp(struct mlx5_core_dev *dev)
1635 {
1636 	struct mlx5_devcom_match_attr attr = {
1637 		.flags = MLX5_DEVCOM_MATCH_FLAGS_NS,
1638 		.net = mlx5_core_net(dev),
1639 	};
1640 	u8 len __always_unused;
1641 
1642 	mlx5_query_nic_sw_system_image_guid(dev, attr.key.buf, &len);
1643 
1644 	/* This component is use to sync adding core_dev to lag_dev and to sync
1645 	 * changes of mlx5_adev_devices between LAG layer and other layers.
1646 	 */
1647 	dev->priv.hca_devcom_comp =
1648 		mlx5_devcom_register_component(dev->priv.devc,
1649 					       MLX5_DEVCOM_HCA_PORTS,
1650 					       &attr, NULL, dev);
1651 	if (!dev->priv.hca_devcom_comp) {
1652 		mlx5_core_err(dev,
1653 			      "Failed to register devcom HCA component.");
1654 		return -EINVAL;
1655 	}
1656 
1657 	return 0;
1658 }
1659 
mlx5_lag_remove_mdev(struct mlx5_core_dev * dev)1660 void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
1661 {
1662 	struct mlx5_lag *ldev;
1663 
1664 	ldev = mlx5_lag_dev(dev);
1665 	if (!ldev)
1666 		return;
1667 
1668 	/* mdev is being removed, might as well remove debugfs
1669 	 * as early as possible.
1670 	 */
1671 	mlx5_ldev_remove_debugfs(dev->priv.dbg.lag_debugfs);
1672 recheck:
1673 	mutex_lock(&ldev->lock);
1674 	if (ldev->mode_changes_in_progress) {
1675 		mutex_unlock(&ldev->lock);
1676 		msleep(100);
1677 		goto recheck;
1678 	}
1679 	mlx5_ldev_remove_mdev(ldev, dev);
1680 	mutex_unlock(&ldev->lock);
1681 	mlx5_lag_unregister_hca_devcom_comp(dev);
1682 	mlx5_ldev_put(ldev);
1683 }
1684 
mlx5_lag_add_mdev(struct mlx5_core_dev * dev)1685 void mlx5_lag_add_mdev(struct mlx5_core_dev *dev)
1686 {
1687 	int err;
1688 
1689 	if (!mlx5_lag_is_supported(dev))
1690 		return;
1691 
1692 	if (mlx5_lag_register_hca_devcom_comp(dev))
1693 		return;
1694 
1695 recheck:
1696 	mlx5_devcom_comp_lock(dev->priv.hca_devcom_comp);
1697 	err = __mlx5_lag_dev_add_mdev(dev);
1698 	mlx5_devcom_comp_unlock(dev->priv.hca_devcom_comp);
1699 
1700 	if (err) {
1701 		msleep(100);
1702 		goto recheck;
1703 	}
1704 	mlx5_ldev_add_debugfs(dev);
1705 }
1706 
mlx5_lag_remove_netdev(struct mlx5_core_dev * dev,struct net_device * netdev)1707 void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev,
1708 			    struct net_device *netdev)
1709 {
1710 	struct mlx5_lag *ldev;
1711 	bool lag_is_active;
1712 
1713 	ldev = mlx5_lag_dev(dev);
1714 	if (!ldev)
1715 		return;
1716 
1717 	mutex_lock(&ldev->lock);
1718 	mlx5_ldev_remove_netdev(ldev, netdev);
1719 	clear_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
1720 
1721 	lag_is_active = __mlx5_lag_is_active(ldev);
1722 	mutex_unlock(&ldev->lock);
1723 
1724 	if (lag_is_active)
1725 		mlx5_queue_bond_work(ldev, 0);
1726 }
1727 
mlx5_lag_add_netdev(struct mlx5_core_dev * dev,struct net_device * netdev)1728 void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
1729 			 struct net_device *netdev)
1730 {
1731 	struct mlx5_lag *ldev;
1732 	int num = 0;
1733 
1734 	ldev = mlx5_lag_dev(dev);
1735 	if (!ldev)
1736 		return;
1737 
1738 	mutex_lock(&ldev->lock);
1739 	mlx5_ldev_add_netdev(ldev, dev, netdev);
1740 	num = mlx5_lag_num_netdevs(ldev);
1741 	if (num >= ldev->ports)
1742 		set_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
1743 	mutex_unlock(&ldev->lock);
1744 	mlx5_queue_bond_work(ldev, 0);
1745 }
1746 
mlx5_get_pre_ldev_func(struct mlx5_lag * ldev,int start_idx,int end_idx)1747 int mlx5_get_pre_ldev_func(struct mlx5_lag *ldev, int start_idx, int end_idx)
1748 {
1749 	int i;
1750 
1751 	for (i = start_idx; i >= end_idx; i--)
1752 		if (ldev->pf[i].dev)
1753 			return i;
1754 	return -1;
1755 }
1756 
mlx5_get_next_ldev_func(struct mlx5_lag * ldev,int start_idx)1757 int mlx5_get_next_ldev_func(struct mlx5_lag *ldev, int start_idx)
1758 {
1759 	int i;
1760 
1761 	for (i = start_idx; i < MLX5_MAX_PORTS; i++)
1762 		if (ldev->pf[i].dev)
1763 			return i;
1764 	return MLX5_MAX_PORTS;
1765 }
1766 
mlx5_lag_is_roce(struct mlx5_core_dev * dev)1767 bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
1768 {
1769 	struct mlx5_lag *ldev;
1770 	unsigned long flags;
1771 	bool res;
1772 
1773 	spin_lock_irqsave(&lag_lock, flags);
1774 	ldev = mlx5_lag_dev(dev);
1775 	res  = ldev && __mlx5_lag_is_roce(ldev);
1776 	spin_unlock_irqrestore(&lag_lock, flags);
1777 
1778 	return res;
1779 }
1780 EXPORT_SYMBOL(mlx5_lag_is_roce);
1781 
mlx5_lag_is_active(struct mlx5_core_dev * dev)1782 bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
1783 {
1784 	struct mlx5_lag *ldev;
1785 	unsigned long flags;
1786 	bool res;
1787 
1788 	spin_lock_irqsave(&lag_lock, flags);
1789 	ldev = mlx5_lag_dev(dev);
1790 	res  = ldev && __mlx5_lag_is_active(ldev);
1791 	spin_unlock_irqrestore(&lag_lock, flags);
1792 
1793 	return res;
1794 }
1795 EXPORT_SYMBOL(mlx5_lag_is_active);
1796 
mlx5_lag_mode_is_hash(struct mlx5_core_dev * dev)1797 bool mlx5_lag_mode_is_hash(struct mlx5_core_dev *dev)
1798 {
1799 	struct mlx5_lag *ldev;
1800 	unsigned long flags;
1801 	bool res = 0;
1802 
1803 	spin_lock_irqsave(&lag_lock, flags);
1804 	ldev = mlx5_lag_dev(dev);
1805 	if (ldev)
1806 		res = test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags);
1807 	spin_unlock_irqrestore(&lag_lock, flags);
1808 
1809 	return res;
1810 }
1811 EXPORT_SYMBOL(mlx5_lag_mode_is_hash);
1812 
mlx5_lag_is_master(struct mlx5_core_dev * dev)1813 bool mlx5_lag_is_master(struct mlx5_core_dev *dev)
1814 {
1815 	struct mlx5_lag *ldev;
1816 	unsigned long flags;
1817 	bool res = false;
1818 	int idx;
1819 
1820 	spin_lock_irqsave(&lag_lock, flags);
1821 	ldev = mlx5_lag_dev(dev);
1822 	idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
1823 	res = ldev && __mlx5_lag_is_active(ldev) && idx >= 0 && dev == ldev->pf[idx].dev;
1824 	spin_unlock_irqrestore(&lag_lock, flags);
1825 
1826 	return res;
1827 }
1828 EXPORT_SYMBOL(mlx5_lag_is_master);
1829 
mlx5_lag_is_sriov(struct mlx5_core_dev * dev)1830 bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
1831 {
1832 	struct mlx5_lag *ldev;
1833 	unsigned long flags;
1834 	bool res;
1835 
1836 	spin_lock_irqsave(&lag_lock, flags);
1837 	ldev = mlx5_lag_dev(dev);
1838 	res  = ldev && __mlx5_lag_is_sriov(ldev);
1839 	spin_unlock_irqrestore(&lag_lock, flags);
1840 
1841 	return res;
1842 }
1843 EXPORT_SYMBOL(mlx5_lag_is_sriov);
1844 
mlx5_lag_is_shared_fdb(struct mlx5_core_dev * dev)1845 bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev)
1846 {
1847 	struct mlx5_lag *ldev;
1848 	unsigned long flags;
1849 	bool res;
1850 
1851 	spin_lock_irqsave(&lag_lock, flags);
1852 	ldev = mlx5_lag_dev(dev);
1853 	res = ldev && test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
1854 	spin_unlock_irqrestore(&lag_lock, flags);
1855 
1856 	return res;
1857 }
1858 EXPORT_SYMBOL(mlx5_lag_is_shared_fdb);
1859 
mlx5_lag_disable_change(struct mlx5_core_dev * dev)1860 void mlx5_lag_disable_change(struct mlx5_core_dev *dev)
1861 {
1862 	struct mlx5_lag *ldev;
1863 
1864 	ldev = mlx5_lag_dev(dev);
1865 	if (!ldev)
1866 		return;
1867 
1868 	mlx5_devcom_comp_lock(dev->priv.hca_devcom_comp);
1869 	mutex_lock(&ldev->lock);
1870 
1871 	ldev->mode_changes_in_progress++;
1872 	if (__mlx5_lag_is_active(ldev)) {
1873 		if (ldev->mode == MLX5_LAG_MODE_MPESW)
1874 			mlx5_lag_disable_mpesw(ldev);
1875 		else
1876 			mlx5_disable_lag(ldev);
1877 	}
1878 
1879 	mutex_unlock(&ldev->lock);
1880 	mlx5_devcom_comp_unlock(dev->priv.hca_devcom_comp);
1881 }
1882 
mlx5_lag_enable_change(struct mlx5_core_dev * dev)1883 void mlx5_lag_enable_change(struct mlx5_core_dev *dev)
1884 {
1885 	struct mlx5_lag *ldev;
1886 
1887 	ldev = mlx5_lag_dev(dev);
1888 	if (!ldev)
1889 		return;
1890 
1891 	mutex_lock(&ldev->lock);
1892 	ldev->mode_changes_in_progress--;
1893 	mutex_unlock(&ldev->lock);
1894 	mlx5_queue_bond_work(ldev, 0);
1895 }
1896 
mlx5_lag_get_slave_port(struct mlx5_core_dev * dev,struct net_device * slave)1897 u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
1898 			   struct net_device *slave)
1899 {
1900 	struct mlx5_lag *ldev;
1901 	unsigned long flags;
1902 	u8 port = 0;
1903 	int i;
1904 
1905 	spin_lock_irqsave(&lag_lock, flags);
1906 	ldev = mlx5_lag_dev(dev);
1907 	if (!(ldev && __mlx5_lag_is_roce(ldev)))
1908 		goto unlock;
1909 
1910 	mlx5_ldev_for_each(i, 0, ldev) {
1911 		if (ldev->pf[i].netdev == slave) {
1912 			port = i;
1913 			break;
1914 		}
1915 	}
1916 
1917 	port = ldev->v2p_map[port * ldev->buckets];
1918 
1919 unlock:
1920 	spin_unlock_irqrestore(&lag_lock, flags);
1921 	return port;
1922 }
1923 EXPORT_SYMBOL(mlx5_lag_get_slave_port);
1924 
mlx5_lag_get_num_ports(struct mlx5_core_dev * dev)1925 u8 mlx5_lag_get_num_ports(struct mlx5_core_dev *dev)
1926 {
1927 	struct mlx5_lag *ldev;
1928 
1929 	ldev = mlx5_lag_dev(dev);
1930 	if (!ldev)
1931 		return 0;
1932 
1933 	return ldev->ports;
1934 }
1935 EXPORT_SYMBOL(mlx5_lag_get_num_ports);
1936 
mlx5_lag_get_next_peer_mdev(struct mlx5_core_dev * dev,int * i)1937 struct mlx5_core_dev *mlx5_lag_get_next_peer_mdev(struct mlx5_core_dev *dev, int *i)
1938 {
1939 	struct mlx5_core_dev *peer_dev = NULL;
1940 	struct mlx5_lag *ldev;
1941 	unsigned long flags;
1942 	int idx;
1943 
1944 	spin_lock_irqsave(&lag_lock, flags);
1945 	ldev = mlx5_lag_dev(dev);
1946 	if (!ldev)
1947 		goto unlock;
1948 
1949 	if (*i == MLX5_MAX_PORTS)
1950 		goto unlock;
1951 	mlx5_ldev_for_each(idx, *i, ldev)
1952 		if (ldev->pf[idx].dev != dev)
1953 			break;
1954 
1955 	if (idx == MLX5_MAX_PORTS) {
1956 		*i = idx;
1957 		goto unlock;
1958 	}
1959 	*i = idx + 1;
1960 
1961 	peer_dev = ldev->pf[idx].dev;
1962 
1963 unlock:
1964 	spin_unlock_irqrestore(&lag_lock, flags);
1965 	return peer_dev;
1966 }
1967 EXPORT_SYMBOL(mlx5_lag_get_next_peer_mdev);
1968 
mlx5_lag_query_cong_counters(struct mlx5_core_dev * dev,u64 * values,int num_counters,size_t * offsets)1969 int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
1970 				 u64 *values,
1971 				 int num_counters,
1972 				 size_t *offsets)
1973 {
1974 	int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
1975 	struct mlx5_core_dev **mdev;
1976 	int ret = 0, i, j, idx = 0;
1977 	struct mlx5_lag *ldev;
1978 	unsigned long flags;
1979 	int num_ports;
1980 	void *out;
1981 
1982 	out = kvzalloc(outlen, GFP_KERNEL);
1983 	if (!out)
1984 		return -ENOMEM;
1985 
1986 	mdev = kvzalloc(sizeof(mdev[0]) * MLX5_MAX_PORTS, GFP_KERNEL);
1987 	if (!mdev) {
1988 		ret = -ENOMEM;
1989 		goto free_out;
1990 	}
1991 
1992 	memset(values, 0, sizeof(*values) * num_counters);
1993 
1994 	spin_lock_irqsave(&lag_lock, flags);
1995 	ldev = mlx5_lag_dev(dev);
1996 	if (ldev && __mlx5_lag_is_active(ldev)) {
1997 		num_ports = ldev->ports;
1998 		mlx5_ldev_for_each(i, 0, ldev)
1999 			mdev[idx++] = ldev->pf[i].dev;
2000 	} else {
2001 		num_ports = 1;
2002 		mdev[MLX5_LAG_P1] = dev;
2003 	}
2004 	spin_unlock_irqrestore(&lag_lock, flags);
2005 
2006 	for (i = 0; i < num_ports; ++i) {
2007 		u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {};
2008 
2009 		MLX5_SET(query_cong_statistics_in, in, opcode,
2010 			 MLX5_CMD_OP_QUERY_CONG_STATISTICS);
2011 		ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in,
2012 					  out);
2013 		if (ret)
2014 			goto free_mdev;
2015 
2016 		for (j = 0; j < num_counters; ++j)
2017 			values[j] += be64_to_cpup((__be64 *)(out + offsets[j]));
2018 	}
2019 
2020 free_mdev:
2021 	kvfree(mdev);
2022 free_out:
2023 	kvfree(out);
2024 	return ret;
2025 }
2026 EXPORT_SYMBOL(mlx5_lag_query_cong_counters);
2027