xref: /linux/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c (revision 68c2dd59a6c784637d53d0bba22b4e118be7e225)
1 /*
2  * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #include <linux/netdevice.h>
34 #include <net/bonding.h>
35 #include <linux/mlx5/driver.h>
36 #include <linux/mlx5/eswitch.h>
37 #include <linux/mlx5/vport.h>
38 #include <linux/mlx5/lag.h>
39 #include "lib/mlx5.h"
40 #include "lib/devcom.h"
41 #include "mlx5_core.h"
42 #include "eswitch.h"
43 #include "esw/acl/ofld.h"
44 #include "lag.h"
45 #include "mp.h"
46 #include "mpesw.h"
47 
48 
49 /* General purpose, use for short periods of time.
50  * Beware of lock dependencies (preferably, no locks should be acquired
51  * under it).
52  */
53 static DEFINE_SPINLOCK(lag_lock);
54 
55 static int get_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags)
56 {
57 	if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
58 		return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT;
59 
60 	if (mode == MLX5_LAG_MODE_MPESW)
61 		return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW;
62 
63 	return MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY;
64 }
65 
66 static u8 lag_active_port_bits(struct mlx5_lag *ldev)
67 {
68 	u8 enabled_ports[MLX5_MAX_PORTS] = {};
69 	u8 active_port = 0;
70 	int num_enabled;
71 	int idx;
72 
73 	mlx5_infer_tx_enabled(&ldev->tracker, ldev, enabled_ports,
74 			      &num_enabled);
75 	for (idx = 0; idx < num_enabled; idx++)
76 		active_port |= BIT_MASK(enabled_ports[idx]);
77 
78 	return active_port;
79 }
80 
81 static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, struct mlx5_lag *ldev,
82 			       int mode, unsigned long flags)
83 {
84 	bool fdb_sel_mode = test_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE,
85 				     &flags);
86 	int port_sel_mode = get_port_sel_mode(mode, flags);
87 	u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {};
88 	u8 *ports = ldev->v2p_map;
89 	int idx0, idx1;
90 	void *lag_ctx;
91 
92 	lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
93 	MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG);
94 	MLX5_SET(lagc, lag_ctx, fdb_selection_mode, fdb_sel_mode);
95 	idx0 = mlx5_lag_get_dev_index_by_seq(ldev, 0);
96 	idx1 = mlx5_lag_get_dev_index_by_seq(ldev, 1);
97 
98 	if (idx0 < 0 || idx1 < 0)
99 		return -EINVAL;
100 
101 	switch (port_sel_mode) {
102 	case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY:
103 		MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[idx0]);
104 		MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[idx1]);
105 		break;
106 	case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT:
107 		if (!MLX5_CAP_PORT_SELECTION(dev, port_select_flow_table_bypass))
108 			break;
109 
110 		MLX5_SET(lagc, lag_ctx, active_port,
111 			 lag_active_port_bits(mlx5_lag_dev(dev)));
112 		break;
113 	default:
114 		break;
115 	}
116 	MLX5_SET(lagc, lag_ctx, port_select_mode, port_sel_mode);
117 
118 	return mlx5_cmd_exec_in(dev, create_lag, in);
119 }
120 
121 static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, struct mlx5_lag *ldev,
122 			       u8 *ports)
123 {
124 	u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
125 	void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
126 	int idx0, idx1;
127 
128 	idx0 = mlx5_lag_get_dev_index_by_seq(ldev, 0);
129 	idx1 = mlx5_lag_get_dev_index_by_seq(ldev, 1);
130 	if (idx0 < 0 || idx1 < 0)
131 		return -EINVAL;
132 
133 	MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
134 	MLX5_SET(modify_lag_in, in, field_select, 0x1);
135 
136 	MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[idx0]);
137 	MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[idx1]);
138 
139 	return mlx5_cmd_exec_in(dev, modify_lag, in);
140 }
141 
142 static u32 mlx5_lag_dev_group_id(struct mlx5_core_dev *dev)
143 {
144 	struct mlx5_lag *ldev = mlx5_lag_dev(dev);
145 	struct lag_func *pf;
146 	int i;
147 
148 	if (!ldev)
149 		return 0;
150 
151 	mlx5_lag_for_each(i, 0, ldev, MLX5_LAG_FILTER_ALL) {
152 		pf = mlx5_lag_pf(ldev, i);
153 		if (pf->dev == dev)
154 			return pf->sd_fdb_active ? pf->group_id : 0;
155 	}
156 	return 0;
157 }
158 
159 static int mlx5_lag_is_sw_lag(struct mlx5_core_dev *dev)
160 {
161 	return mlx5_lag_is_sd(dev);
162 }
163 
164 int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev)
165 {
166 	u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {};
167 	struct mlx5_lag *ldev = mlx5_lag_dev(dev);
168 	int ret;
169 
170 	if (mlx5_lag_is_sw_lag(dev)) {
171 		if (!ldev)
172 			return -ENODEV;
173 
174 		mutex_lock(&ldev->lock);
175 		ret = mlx5_lag_create_vport_lag(mlx5_lag_dev(dev),
176 						mlx5_lag_dev_group_id(dev));
177 		mutex_unlock(&ldev->lock);
178 		return ret;
179 	}
180 
181 	MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG);
182 
183 	return mlx5_cmd_exec_in(dev, create_vport_lag, in);
184 }
185 EXPORT_SYMBOL(mlx5_cmd_create_vport_lag);
186 
187 int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
188 {
189 	u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {};
190 	struct mlx5_lag *ldev = mlx5_lag_dev(dev);
191 
192 	if (mlx5_lag_is_sw_lag(dev)) {
193 		if (!ldev)
194 			return 0;
195 
196 		mutex_lock(&ldev->lock);
197 		mlx5_lag_destroy_vport_lag(mlx5_lag_dev(dev),
198 					   mlx5_lag_dev_group_id(dev));
199 		mutex_unlock(&ldev->lock);
200 		return 0;
201 	}
202 
203 	MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG);
204 
205 	return mlx5_cmd_exec_in(dev, destroy_vport_lag, in);
206 }
207 EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
208 
209 static void mlx5_infer_tx_disabled(struct lag_tracker *tracker, struct mlx5_lag *ldev,
210 				   u8 *ports, int *num_disabled)
211 {
212 	int i;
213 
214 	*num_disabled = 0;
215 	mlx5_ldev_for_each(i, 0, ldev)
216 		if (!tracker->netdev_state[i].tx_enabled ||
217 		    !tracker->netdev_state[i].link_up)
218 			ports[(*num_disabled)++] = i;
219 }
220 
221 void mlx5_infer_tx_enabled(struct lag_tracker *tracker, struct mlx5_lag *ldev,
222 			   u8 *ports, int *num_enabled)
223 {
224 	int i;
225 
226 	*num_enabled = 0;
227 	mlx5_ldev_for_each(i, 0, ldev)
228 		if (tracker->netdev_state[i].tx_enabled &&
229 		    tracker->netdev_state[i].link_up)
230 			ports[(*num_enabled)++] = i;
231 
232 	if (*num_enabled == 0)
233 		mlx5_infer_tx_disabled(tracker, ldev, ports, num_enabled);
234 }
235 
236 static void mlx5_lag_print_mapping(struct mlx5_core_dev *dev,
237 				   struct mlx5_lag *ldev,
238 				   struct lag_tracker *tracker,
239 				   unsigned long flags)
240 {
241 	char buf[MLX5_MAX_PORTS * 10 + 1] = {};
242 	u8 enabled_ports[MLX5_MAX_PORTS] = {};
243 	int written = 0;
244 	int num_enabled;
245 	int idx;
246 	int err;
247 	int i;
248 	int j;
249 
250 	if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
251 		mlx5_infer_tx_enabled(tracker, ldev, enabled_ports,
252 				      &num_enabled);
253 		for (i = 0; i < num_enabled; i++) {
254 			err = scnprintf(buf + written, 4, "%d, ", enabled_ports[i] + 1);
255 			if (err != 3)
256 				return;
257 			written += err;
258 		}
259 		buf[written - 2] = 0;
260 		mlx5_core_info(dev, "lag map active ports: %s\n", buf);
261 	} else {
262 		mlx5_ldev_for_each(i, 0, ldev) {
263 			for (j  = 0; j < ldev->buckets; j++) {
264 				idx = i * ldev->buckets + j;
265 				err = scnprintf(buf + written, 10,
266 						" port %d:%d", i + 1, ldev->v2p_map[idx]);
267 				if (err != 9)
268 					return;
269 				written += err;
270 			}
271 		}
272 		mlx5_core_info(dev, "lag map:%s\n", buf);
273 	}
274 }
275 
276 static int mlx5_lag_netdev_event(struct notifier_block *this,
277 				 unsigned long event, void *ptr);
278 static void mlx5_do_bond_work(struct work_struct *work);
279 
280 static void mlx5_ldev_free(struct kref *ref)
281 {
282 	struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref);
283 	struct lag_func *pf;
284 	struct net *net;
285 	int i;
286 
287 	if (ldev->nb.notifier_call) {
288 		net = read_pnet(&ldev->net);
289 		unregister_netdevice_notifier_net(net, &ldev->nb);
290 	}
291 
292 	mlx5_lag_for_each(i, 0, ldev, MLX5_LAG_FILTER_ALL) {
293 		pf = mlx5_lag_pf(ldev, i);
294 		if (pf->port_change_nb.nb.notifier_call) {
295 			struct mlx5_nb *nb = &pf->port_change_nb;
296 
297 			mlx5_eq_notifier_unregister(pf->dev, nb);
298 		}
299 		xa_erase(&ldev->pfs, i);
300 		kfree(pf);
301 	}
302 	xa_destroy(&ldev->pfs);
303 
304 	mlx5_lag_mp_cleanup(ldev);
305 	cancel_delayed_work_sync(&ldev->bond_work);
306 	cancel_work_sync(&ldev->speed_update_work);
307 	destroy_workqueue(ldev->wq);
308 	mutex_destroy(&ldev->lock);
309 	kfree(ldev);
310 }
311 
312 static void mlx5_ldev_put(struct mlx5_lag *ldev)
313 {
314 	kref_put(&ldev->ref, mlx5_ldev_free);
315 }
316 
317 static void mlx5_ldev_get(struct mlx5_lag *ldev)
318 {
319 	kref_get(&ldev->ref);
320 }
321 
322 static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev)
323 {
324 	struct mlx5_lag *ldev;
325 	int err;
326 
327 	ldev = kzalloc_obj(*ldev);
328 	if (!ldev)
329 		return NULL;
330 
331 	ldev->wq = create_singlethread_workqueue("mlx5_lag");
332 	if (!ldev->wq) {
333 		kfree(ldev);
334 		return NULL;
335 	}
336 
337 	kref_init(&ldev->ref);
338 	mutex_init(&ldev->lock);
339 	xa_init_flags(&ldev->pfs, XA_FLAGS_ALLOC);
340 	INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
341 	INIT_WORK(&ldev->speed_update_work, mlx5_mpesw_speed_update_work);
342 
343 	if (!mlx5_sd_is_supported(dev)) {
344 		ldev->nb.notifier_call = mlx5_lag_netdev_event;
345 		write_pnet(&ldev->net, mlx5_core_net(dev));
346 		if (register_netdevice_notifier_net(read_pnet(&ldev->net),
347 						    &ldev->nb)) {
348 			ldev->nb.notifier_call = NULL;
349 			mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
350 		}
351 	}
352 	ldev->mode = MLX5_LAG_MODE_NONE;
353 
354 	err = mlx5_lag_mp_init(ldev);
355 	if (err)
356 		mlx5_core_err(dev, "Failed to init multipath lag err=%d\n",
357 			      err);
358 
359 	ldev->ports = MLX5_CAP_GEN(dev, num_lag_ports);
360 	ldev->buckets = 1;
361 
362 	return ldev;
363 }
364 
365 int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
366 				struct net_device *ndev)
367 {
368 	struct lag_func *pf;
369 	int i;
370 
371 	mlx5_ldev_for_each(i, 0, ldev) {
372 		pf = mlx5_lag_pf(ldev, i);
373 		if (pf->netdev == ndev)
374 			return i;
375 	}
376 
377 	return -ENOENT;
378 }
379 
380 static int mlx5_lag_get_master_idx(struct mlx5_lag *ldev)
381 {
382 	unsigned long idx = 0;
383 	void *entry;
384 
385 	if (!ldev)
386 		return -ENOENT;
387 
388 	entry = xa_find(&ldev->pfs, &idx, U8_MAX, MLX5_LAG_XA_MARK_MASTER);
389 	if (!entry)
390 		return -ENOENT;
391 
392 	return (int)idx;
393 }
394 
395 int mlx5_lag_get_dev_index_by_seq(struct mlx5_lag *ldev, int seq)
396 {
397 	int master_idx, i, num = 0;
398 
399 	if (!ldev)
400 		return -ENOENT;
401 
402 	master_idx = mlx5_lag_get_master_idx(ldev);
403 
404 	/* If seq 0 is requested and there's a primary PF, return it */
405 	if (master_idx >= 0) {
406 		if (seq == 0)
407 			return master_idx;
408 		num++;
409 	}
410 
411 	mlx5_ldev_for_each(i, 0, ldev) {
412 		/* Skip the primary PF in the loop */
413 		if (i == master_idx)
414 			continue;
415 
416 		if (num == seq)
417 			return i;
418 		num++;
419 	}
420 	return -ENOENT;
421 }
422 
423 /* Return the appropriate iterator filter for a device in LAG:
424  * - SD shared FDB active: iterate only the device's SD group
425  * - SD group exists but shared FDB not active: iterate all devices
426  * - No SD: iterate ports only
427  */
428 static u32 mlx5_lag_get_filter(struct mlx5_lag *ldev, struct mlx5_core_dev *dev)
429 {
430 	struct lag_func *pf = mlx5_lag_pf_by_dev(ldev, dev);
431 
432 	if (pf && pf->sd_fdb_active)
433 		return pf->group_id;
434 	if (pf && pf->group_id)
435 		return MLX5_LAG_FILTER_ALL;
436 	return MLX5_LAG_FILTER_PORTS;
437 }
438 
439 /* Reverse of mlx5_lag_get_dev_index_by_seq: given a device, return its
440  * sequence number in the LAG. Master is always 0, others numbered
441  * sequentially starting from 1.
442  */
443 int mlx5_lag_get_dev_seq(struct mlx5_core_dev *dev)
444 {
445 	struct mlx5_lag *ldev = mlx5_lag_dev(dev);
446 	int master_idx, i, num = 1;
447 	struct lag_func *pf;
448 	u32 filter;
449 
450 	if (!ldev)
451 		return -ENOENT;
452 
453 	filter = mlx5_lag_get_filter(ldev, dev);
454 	master_idx = mlx5_lag_get_dev_index_by_seq_filter(ldev, 0, filter);
455 	if (master_idx < 0)
456 		return -ENOENT;
457 
458 	pf = mlx5_lag_pf(ldev, master_idx);
459 	if (pf && pf->dev == dev)
460 		return 0;
461 
462 	mlx5_lag_for_each(i, 0, ldev, filter) {
463 		if (i == master_idx)
464 			continue;
465 		pf = mlx5_lag_pf(ldev, i);
466 		if (pf->dev == dev)
467 			return num;
468 		num++;
469 	}
470 	return -ENOENT;
471 }
472 EXPORT_SYMBOL(mlx5_lag_get_dev_seq);
473 
474 /* seq 0 = master, then all remaining devices */
475 static int mlx5_lag_get_dev_index_by_seq_all(struct mlx5_lag *ldev, int seq)
476 {
477 	int master_idx, i, num = 0;
478 
479 	master_idx = mlx5_lag_get_master_idx(ldev);
480 
481 	if (master_idx >= 0) {
482 		if (seq == 0)
483 			return master_idx;
484 		num++;
485 	}
486 
487 	mlx5_lag_for_each(i, 0, ldev, MLX5_LAG_FILTER_ALL) {
488 		if (i == master_idx)
489 			continue;
490 		if (num == seq)
491 			return i;
492 		num++;
493 	}
494 	return -ENOENT;
495 }
496 
497 /* From group POV, port-marked entry is the lag master */
498 static int mlx5_lag_get_dev_index_by_seq_group(struct mlx5_lag *ldev, int seq,
499 					       u32 group_id)
500 {
501 	int i, num = 0;
502 
503 	mlx5_lag_for_each(i, 0, ldev, group_id) {
504 		if (xa_get_mark(&ldev->pfs, i, MLX5_LAG_XA_MARK_PORT)) {
505 			if (seq == 0)
506 				return i;
507 			num++;
508 			break;
509 		}
510 	}
511 
512 	mlx5_lag_for_each(i, 0, ldev, group_id) {
513 		if (xa_get_mark(&ldev->pfs, i, MLX5_LAG_XA_MARK_PORT))
514 			continue;
515 		if (num == seq)
516 			return i;
517 		num++;
518 	}
519 	return -ENOENT;
520 }
521 
522 int mlx5_lag_get_dev_index_by_seq_filter(struct mlx5_lag *ldev, int seq,
523 					 u32 filter)
524 {
525 	if (!ldev)
526 		return -ENOENT;
527 
528 	if (!filter || filter == MLX5_LAG_FILTER_PORTS)
529 		return mlx5_lag_get_dev_index_by_seq(ldev, seq);
530 
531 	if (filter == MLX5_LAG_FILTER_ALL)
532 		return mlx5_lag_get_dev_index_by_seq_all(ldev, seq);
533 
534 	return mlx5_lag_get_dev_index_by_seq_group(ldev, seq, filter);
535 }
536 
537 /* Devcom events for LAG master marking */
538 #define LAG_DEVCOM_PAIR		(0)
539 #define LAG_DEVCOM_UNPAIR	(1)
540 
541 static void mlx5_lag_mark_master(struct mlx5_lag *ldev)
542 {
543 	int lowest_dev_idx = INT_MAX;
544 	struct lag_func *pf;
545 	int master_xa_idx = -1;
546 	int dev_idx;
547 	int i;
548 
549 	mlx5_ldev_for_each(i, 0, ldev) {
550 		pf = mlx5_lag_pf(ldev, i);
551 		dev_idx = mlx5_get_dev_index(pf->dev);
552 		if (dev_idx < lowest_dev_idx) {
553 			lowest_dev_idx = dev_idx;
554 			master_xa_idx = i;
555 		}
556 	}
557 
558 	if (master_xa_idx >= 0)
559 		xa_set_mark(&ldev->pfs, master_xa_idx, MLX5_LAG_XA_MARK_MASTER);
560 }
561 
562 static void mlx5_lag_clear_master(struct mlx5_lag *ldev)
563 {
564 	unsigned long idx = 0;
565 	void *entry;
566 
567 	entry = xa_find(&ldev->pfs, &idx, U8_MAX, MLX5_LAG_XA_MARK_MASTER);
568 	if (!entry)
569 		return;
570 
571 	xa_clear_mark(&ldev->pfs, idx, MLX5_LAG_XA_MARK_MASTER);
572 }
573 
574 /* Devcom event handler to manage LAG master marking */
575 static int mlx5_lag_devcom_event(int event, void *my_data, void *event_data)
576 {
577 	struct mlx5_core_dev *dev = my_data;
578 	struct mlx5_lag *ldev;
579 	int idx;
580 
581 	ldev = mlx5_lag_dev(dev);
582 	if (!ldev)
583 		return 0;
584 
585 	mutex_lock(&ldev->lock);
586 	switch (event) {
587 	case LAG_DEVCOM_PAIR:
588 		/* No need to mark more than once */
589 		idx = mlx5_lag_get_master_idx(ldev);
590 		if (idx >= 0)
591 			break;
592 		/* Check if all LAG ports are now registered */
593 		if (mlx5_lag_num_devs(ldev) == ldev->ports)
594 			mlx5_lag_mark_master(ldev);
595 		break;
596 
597 	case LAG_DEVCOM_UNPAIR:
598 		/* Clear master mark when a device is removed */
599 		mlx5_lag_clear_master(ldev);
600 		break;
601 	}
602 	mutex_unlock(&ldev->lock);
603 	return 0;
604 }
605 
606 int mlx5_lag_num_devs(struct mlx5_lag *ldev)
607 {
608 	int i, num = 0;
609 
610 	if (!ldev)
611 		return 0;
612 
613 	mlx5_ldev_for_each(i, 0, ldev) {
614 		(void)i;
615 		num++;
616 	}
617 	return num;
618 }
619 
620 int mlx5_lag_num_netdevs(struct mlx5_lag *ldev)
621 {
622 	struct lag_func *pf;
623 	int i, num = 0;
624 
625 	if (!ldev)
626 		return 0;
627 
628 	mlx5_ldev_for_each(i, 0, ldev) {
629 		pf = mlx5_lag_pf(ldev, i);
630 		if (pf->netdev)
631 			num++;
632 	}
633 	return num;
634 }
635 
636 static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev)
637 {
638 	return ldev->mode == MLX5_LAG_MODE_ROCE;
639 }
640 
641 static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev)
642 {
643 	return ldev->mode == MLX5_LAG_MODE_SRIOV;
644 }
645 
646 static bool __mlx5_lag_is_sd_active(struct mlx5_lag *ldev,
647 				    struct mlx5_core_dev *dev)
648 {
649 	struct lag_func *pf = mlx5_lag_pf_by_dev(ldev, dev);
650 
651 	return pf && pf->sd_fdb_active;
652 }
653 
654 /* Create a mapping between steering slots and active ports.
655  * As we have ldev->buckets slots per port first assume the native
656  * mapping should be used.
657  * If there are ports that are disabled fill the relevant slots
658  * with mapping that points to active ports.
659  */
660 static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
661 					   struct mlx5_lag *ldev,
662 					   u8 buckets,
663 					   u8 *ports)
664 {
665 	int disabled[MLX5_MAX_PORTS] = {};
666 	int enabled[MLX5_MAX_PORTS] = {};
667 	int disabled_ports_num = 0;
668 	int enabled_ports_num = 0;
669 	int idx;
670 	u32 rand;
671 	int i;
672 	int j;
673 
674 	mlx5_ldev_for_each(i, 0, ldev) {
675 		if (tracker->netdev_state[i].tx_enabled &&
676 		    tracker->netdev_state[i].link_up)
677 			enabled[enabled_ports_num++] = i;
678 		else
679 			disabled[disabled_ports_num++] = i;
680 	}
681 
682 	/* Use native mapping by default where each port's buckets
683 	 * point the native port: 1 1 1 .. 1 2 2 2 ... 2 3 3 3 ... 3 etc
684 	 * ports[] values are 1-indexed device indices for FW.
685 	 */
686 	mlx5_ldev_for_each(i, 0, ldev) {
687 		for (j = 0; j < buckets; j++) {
688 			idx = i * buckets + j;
689 			ports[idx] = mlx5_lag_xa_to_dev_idx(ldev, i) + 1;
690 		}
691 	}
692 
693 	/* If all ports are disabled/enabled keep native mapping */
694 	if (enabled_ports_num == ldev->ports ||
695 	    disabled_ports_num == ldev->ports)
696 		return;
697 
698 	/* Go over the disabled ports and for each assign a random active port */
699 	for (i = 0; i < disabled_ports_num; i++) {
700 		for (j = 0; j < buckets; j++) {
701 			int rand_xa_idx;
702 
703 			get_random_bytes(&rand, 4);
704 			rand_xa_idx = enabled[rand % enabled_ports_num];
705 			ports[disabled[i] * buckets + j] =
706 				mlx5_lag_xa_to_dev_idx(ldev, rand_xa_idx) + 1;
707 		}
708 	}
709 }
710 
711 static bool mlx5_lag_has_drop_rule(struct mlx5_lag *ldev)
712 {
713 	struct lag_func *pf;
714 	int i;
715 
716 	mlx5_ldev_for_each(i, 0, ldev) {
717 		pf = mlx5_lag_pf(ldev, i);
718 		if (pf->has_drop)
719 			return true;
720 	}
721 	return false;
722 }
723 
724 static void mlx5_lag_drop_rule_cleanup(struct mlx5_lag *ldev)
725 {
726 	struct lag_func *pf;
727 	int i;
728 
729 	mlx5_ldev_for_each(i, 0, ldev) {
730 		pf = mlx5_lag_pf(ldev, i);
731 		if (!pf->has_drop)
732 			continue;
733 
734 		mlx5_esw_acl_ingress_vport_drop_rule_destroy(pf->dev->priv.eswitch,
735 							     MLX5_VPORT_UPLINK);
736 		pf->has_drop = false;
737 	}
738 }
739 
740 static void mlx5_lag_drop_rule_setup(struct mlx5_lag *ldev,
741 				     struct lag_tracker *tracker)
742 {
743 	u8 disabled_ports[MLX5_MAX_PORTS] = {};
744 	struct mlx5_core_dev *dev;
745 	struct lag_func *pf;
746 	int disabled_index;
747 	int num_disabled;
748 	int err;
749 	int i;
750 
751 	/* First delete the current drop rule so there won't be any dropped
752 	 * packets
753 	 */
754 	mlx5_lag_drop_rule_cleanup(ldev);
755 
756 	if (!ldev->tracker.has_inactive)
757 		return;
758 
759 	mlx5_infer_tx_disabled(tracker, ldev, disabled_ports, &num_disabled);
760 
761 	for (i = 0; i < num_disabled; i++) {
762 		disabled_index = disabled_ports[i];
763 		pf = mlx5_lag_pf(ldev, disabled_index);
764 		dev = pf->dev;
765 		err = mlx5_esw_acl_ingress_vport_drop_rule_create(dev->priv.eswitch,
766 								  MLX5_VPORT_UPLINK);
767 		if (!err)
768 			pf->has_drop = true;
769 		else
770 			mlx5_core_err(dev,
771 				      "Failed to create lag drop rule, error: %d", err);
772 	}
773 }
774 
775 static int mlx5_cmd_modify_active_port(struct mlx5_core_dev *dev, u8 ports)
776 {
777 	u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
778 	void *lag_ctx;
779 
780 	lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
781 
782 	MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
783 	MLX5_SET(modify_lag_in, in, field_select, 0x2);
784 
785 	MLX5_SET(lagc, lag_ctx, active_port, ports);
786 
787 	return mlx5_cmd_exec_in(dev, modify_lag, in);
788 }
789 
790 static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports)
791 {
792 	int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
793 	struct mlx5_core_dev *dev0;
794 	u8 active_ports;
795 	int ret;
796 
797 	if (idx < 0)
798 		return -EINVAL;
799 
800 	dev0 = mlx5_lag_pf(ldev, idx)->dev;
801 	if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags)) {
802 		ret = mlx5_lag_port_sel_modify(ldev, ports);
803 		if (ret ||
804 		    !MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table_bypass))
805 			return ret;
806 
807 		active_ports = lag_active_port_bits(ldev);
808 
809 		return mlx5_cmd_modify_active_port(dev0, active_ports);
810 	}
811 	return mlx5_cmd_modify_lag(dev0, ldev, ports);
812 }
813 
814 static struct net_device *mlx5_lag_active_backup_get_netdev(struct mlx5_core_dev *dev)
815 {
816 	struct net_device *ndev = NULL;
817 	struct lag_func *pf;
818 	struct mlx5_lag *ldev;
819 	unsigned long flags;
820 	int i, last_idx;
821 
822 	spin_lock_irqsave(&lag_lock, flags);
823 	ldev = mlx5_lag_dev(dev);
824 
825 	if (!ldev)
826 		goto unlock;
827 
828 	mlx5_ldev_for_each(i, 0, ldev) {
829 		pf = mlx5_lag_pf(ldev, i);
830 		if (ldev->tracker.netdev_state[i].tx_enabled)
831 			ndev = pf->netdev;
832 	}
833 	if (!ndev) {
834 		last_idx = mlx5_lag_get_dev_index_by_seq(ldev, ldev->ports - 1);
835 		if (last_idx < 0)
836 			goto unlock;
837 		pf = mlx5_lag_pf(ldev, last_idx);
838 		ndev = pf->netdev;
839 	}
840 
841 	dev_hold(ndev);
842 
843 unlock:
844 	spin_unlock_irqrestore(&lag_lock, flags);
845 
846 	return ndev;
847 }
848 
849 void mlx5_modify_lag(struct mlx5_lag *ldev,
850 		     struct lag_tracker *tracker)
851 {
852 	int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
853 	u8 ports[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS] = {};
854 	struct mlx5_core_dev *dev0;
855 	int idx;
856 	int err;
857 	int i;
858 	int j;
859 
860 	if (first_idx < 0)
861 		return;
862 
863 	dev0 = mlx5_lag_pf(ldev, first_idx)->dev;
864 	mlx5_infer_tx_affinity_mapping(tracker, ldev, ldev->buckets, ports);
865 
866 	mlx5_ldev_for_each(i, 0, ldev) {
867 		for (j = 0; j < ldev->buckets; j++) {
868 			idx = i * ldev->buckets + j;
869 			if (ports[idx] == ldev->v2p_map[idx])
870 				continue;
871 			err = _mlx5_modify_lag(ldev, ports);
872 			if (err) {
873 				mlx5_core_err(dev0,
874 					      "Failed to modify LAG (%d)\n",
875 					      err);
876 				return;
877 			}
878 			memcpy(ldev->v2p_map, ports, sizeof(ports));
879 
880 			mlx5_lag_print_mapping(dev0, ldev, tracker,
881 					       ldev->mode_flags);
882 			break;
883 		}
884 	}
885 
886 	if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
887 		struct net_device *ndev = mlx5_lag_active_backup_get_netdev(dev0);
888 
889 		if(!(ldev->mode == MLX5_LAG_MODE_ROCE))
890 			mlx5_lag_drop_rule_setup(ldev, tracker);
891 		/** Only sriov and roce lag should have tracker->tx_type set so
892 		 *  no need to check the mode
893 		 */
894 		blocking_notifier_call_chain(&dev0->priv.lag_nh,
895 					     MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE,
896 					     ndev);
897 		dev_put(ndev);
898 	}
899 }
900 
901 static int mlx5_lag_set_port_sel_mode(struct mlx5_lag *ldev,
902 				      enum mlx5_lag_mode mode,
903 				      unsigned long *flags)
904 {
905 	int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
906 	struct mlx5_core_dev *dev0;
907 
908 	if (first_idx < 0)
909 		return -EINVAL;
910 
911 	if (mode == MLX5_LAG_MODE_MPESW ||
912 	    mode == MLX5_LAG_MODE_MULTIPATH)
913 		return 0;
914 
915 	dev0 = mlx5_lag_pf(ldev, first_idx)->dev;
916 
917 	if (!MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table)) {
918 		if (ldev->ports > 2)
919 			return -EINVAL;
920 		return 0;
921 	}
922 
923 	if (ldev->ports > 2)
924 		ldev->buckets = MLX5_LAG_MAX_HASH_BUCKETS;
925 
926 	set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags);
927 
928 	return 0;
929 }
930 
931 static int mlx5_lag_set_flags(struct mlx5_lag *ldev, enum mlx5_lag_mode mode,
932 			      struct lag_tracker *tracker, bool shared_fdb,
933 			      unsigned long *flags)
934 {
935 	*flags = 0;
936 	if (shared_fdb) {
937 		set_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, flags);
938 		set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags);
939 	}
940 
941 	if (mode == MLX5_LAG_MODE_MPESW)
942 		set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags);
943 
944 	return mlx5_lag_set_port_sel_mode(ldev, mode, flags);
945 }
946 
947 char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags)
948 {
949 	int port_sel_mode = get_port_sel_mode(mode, flags);
950 
951 	switch (port_sel_mode) {
952 	case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY: return "queue_affinity";
953 	case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT: return "hash";
954 	case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW: return "mpesw";
955 	default: return "invalid";
956 	}
957 }
958 
959 static int mlx5_create_lag(struct mlx5_lag *ldev,
960 			   struct lag_tracker *tracker,
961 			   enum mlx5_lag_mode mode,
962 			   unsigned long flags)
963 {
964 	int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
965 	bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
966 	u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
967 	struct mlx5_core_dev *dev0;
968 	int err;
969 
970 	if (first_idx < 0)
971 		return -EINVAL;
972 
973 	dev0 = mlx5_lag_pf(ldev, first_idx)->dev;
974 	if (tracker)
975 		mlx5_lag_print_mapping(dev0, ldev, tracker, flags);
976 	mlx5_core_info(dev0, "shared_fdb:%d mode:%s\n",
977 		       shared_fdb, mlx5_get_str_port_sel_mode(mode, flags));
978 
979 	err = mlx5_cmd_create_lag(dev0, ldev, mode, flags);
980 	if (err) {
981 		mlx5_core_err(dev0,
982 			      "Failed to create LAG (%d)\n",
983 			      err);
984 		return err;
985 	}
986 
987 	if (shared_fdb) {
988 		err = mlx5_lag_create_single_fdb(ldev);
989 		if (err)
990 			mlx5_core_err(dev0, "Can't enable single FDB mode\n");
991 		else
992 			mlx5_core_info(dev0, "Operation mode is single FDB\n");
993 	}
994 
995 	if (err) {
996 		MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
997 		if (mlx5_cmd_exec_in(dev0, destroy_lag, in))
998 			mlx5_core_err(dev0,
999 				      "Failed to deactivate RoCE LAG; driver restart required\n");
1000 	}
1001 	BLOCKING_INIT_NOTIFIER_HEAD(&dev0->priv.lag_nh);
1002 
1003 	return err;
1004 }
1005 
1006 int mlx5_activate_lag(struct mlx5_lag *ldev,
1007 		      struct lag_tracker *tracker,
1008 		      enum mlx5_lag_mode mode,
1009 		      bool shared_fdb)
1010 {
1011 	bool roce_lag = mode == MLX5_LAG_MODE_ROCE;
1012 	struct mlx5_core_dev *dev0;
1013 	unsigned long flags = 0;
1014 	int master_idx;
1015 	int err;
1016 
1017 	master_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
1018 	if (master_idx < 0)
1019 		return -EINVAL;
1020 
1021 	dev0 = mlx5_lag_pf(ldev, master_idx)->dev;
1022 	err = mlx5_lag_set_flags(ldev, mode, tracker, shared_fdb, &flags);
1023 	if (err)
1024 		return err;
1025 
1026 	if (mode != MLX5_LAG_MODE_MPESW) {
1027 		mlx5_infer_tx_affinity_mapping(tracker, ldev, ldev->buckets, ldev->v2p_map);
1028 		if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
1029 			err = mlx5_lag_port_sel_create(ldev, tracker->hash_type,
1030 						       ldev->v2p_map);
1031 			if (err) {
1032 				mlx5_core_err(dev0,
1033 					      "Failed to create LAG port selection(%d)\n",
1034 					      err);
1035 				return err;
1036 			}
1037 		}
1038 	}
1039 
1040 	err = mlx5_create_lag(ldev, tracker, mode, flags);
1041 	if (err) {
1042 		if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
1043 			mlx5_lag_port_sel_destroy(ldev);
1044 		if (roce_lag)
1045 			mlx5_core_err(dev0,
1046 				      "Failed to activate RoCE LAG\n");
1047 		else
1048 			mlx5_core_err(dev0,
1049 				      "Failed to activate VF LAG\n"
1050 				      "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
1051 		return err;
1052 	}
1053 
1054 	if (tracker && tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP &&
1055 	    !roce_lag)
1056 		mlx5_lag_drop_rule_setup(ldev, tracker);
1057 
1058 	ldev->mode = mode;
1059 	ldev->mode_flags = flags;
1060 	return 0;
1061 }
1062 
1063 int mlx5_deactivate_lag(struct mlx5_lag *ldev)
1064 {
1065 	int master_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
1066 	u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
1067 	bool roce_lag = __mlx5_lag_is_roce(ldev);
1068 	unsigned long flags = ldev->mode_flags;
1069 	struct mlx5_core_dev *dev0;
1070 	int err;
1071 
1072 	if (master_idx < 0)
1073 		return -EINVAL;
1074 
1075 	dev0 = mlx5_lag_pf(ldev, master_idx)->dev;
1076 	ldev->mode = MLX5_LAG_MODE_NONE;
1077 	ldev->mode_flags = 0;
1078 	mlx5_lag_mp_reset(ldev);
1079 
1080 	if (test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags)) {
1081 		mlx5_lag_destroy_single_fdb(ldev);
1082 		clear_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
1083 	}
1084 
1085 	MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
1086 	err = mlx5_cmd_exec_in(dev0, destroy_lag, in);
1087 	if (err) {
1088 		if (roce_lag) {
1089 			mlx5_core_err(dev0,
1090 				      "Failed to deactivate RoCE LAG; driver restart required\n");
1091 		} else {
1092 			mlx5_core_err(dev0,
1093 				      "Failed to deactivate VF LAG; driver restart required\n"
1094 				      "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
1095 		}
1096 		return err;
1097 	}
1098 
1099 	if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
1100 		mlx5_lag_port_sel_destroy(ldev);
1101 		ldev->buckets = 1;
1102 	}
1103 	if (mlx5_lag_has_drop_rule(ldev))
1104 		mlx5_lag_drop_rule_cleanup(ldev);
1105 
1106 	return 0;
1107 }
1108 
1109 bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
1110 {
1111 	int master_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
1112 #ifdef CONFIG_MLX5_ESWITCH
1113 	struct mlx5_core_dev *dev;
1114 	u8 mode;
1115 #endif
1116 	struct lag_func *pf;
1117 	bool roce_support;
1118 	int i;
1119 
1120 	if (master_idx < 0 || mlx5_lag_num_devs(ldev) != ldev->ports)
1121 		return false;
1122 
1123 #ifdef CONFIG_MLX5_ESWITCH
1124 	mlx5_ldev_for_each(i, 0, ldev) {
1125 		pf = mlx5_lag_pf(ldev, i);
1126 		dev = pf->dev;
1127 		if (mlx5_eswitch_num_vfs(dev->priv.eswitch) && !is_mdev_switchdev_mode(dev))
1128 			return false;
1129 	}
1130 
1131 	pf = mlx5_lag_pf(ldev, master_idx);
1132 	dev = pf->dev;
1133 	mode = mlx5_eswitch_mode(dev);
1134 	mlx5_ldev_for_each(i, 0, ldev) {
1135 		pf = mlx5_lag_pf(ldev, i);
1136 		if (mlx5_eswitch_mode(pf->dev) != mode)
1137 			return false;
1138 	}
1139 
1140 #else
1141 	mlx5_ldev_for_each(i, 0, ldev) {
1142 		pf = mlx5_lag_pf(ldev, i);
1143 		if (mlx5_sriov_is_enabled(pf->dev))
1144 			return false;
1145 	}
1146 #endif
1147 	pf = mlx5_lag_pf(ldev, master_idx);
1148 	roce_support = mlx5_get_roce_state(pf->dev);
1149 	mlx5_ldev_for_each(i, 0, ldev) {
1150 		if (i == master_idx)
1151 			continue;
1152 		pf = mlx5_lag_pf(ldev, i);
1153 		if (mlx5_get_roce_state(pf->dev) != roce_support)
1154 			return false;
1155 	}
1156 
1157 	return true;
1158 }
1159 
1160 static void mlx5_lag_assert_locked_transition(struct mlx5_lag *ldev, u32 filter)
1161 {
1162 	struct mlx5_devcom_comp_dev *devcom = NULL;
1163 	struct lag_func *pf;
1164 	int i;
1165 
1166 	lockdep_assert_held(&ldev->lock);
1167 
1168 	i = mlx5_get_next_lag_func(ldev, 0, filter);
1169 	if (i < MLX5_MAX_PORTS) {
1170 		pf = mlx5_lag_pf(ldev, i);
1171 		if (filter == MLX5_LAG_FILTER_PORTS ||
1172 		    filter == MLX5_LAG_FILTER_ALL)
1173 			devcom = pf->dev->priv.hca_devcom_comp;
1174 		else
1175 			devcom = mlx5_sd_get_devcom(pf->dev);
1176 	}
1177 	mlx5_devcom_comp_assert_locked(devcom);
1178 }
1179 
1180 static void mlx5_lag_drop_lock_for_reps(struct mlx5_lag *ldev, u32 filter)
1181 {
1182 	mlx5_lag_assert_locked_transition(ldev, filter);
1183 
1184 	/* Keep PF membership stable while ldev->lock is dropped. Device add
1185 	 * and remove paths observe mode_changes_in_progress and retry.
1186 	 */
1187 	ldev->mode_changes_in_progress++;
1188 	mutex_unlock(&ldev->lock);
1189 }
1190 
1191 static void mlx5_lag_retake_lock_after_reps(struct mlx5_lag *ldev)
1192 {
1193 	mutex_lock(&ldev->lock);
1194 	ldev->mode_changes_in_progress--;
1195 }
1196 
1197 void mlx5_lag_rescan_dev_locked(struct mlx5_lag *ldev,
1198 				struct mlx5_core_dev *dev,
1199 				bool enable)
1200 {
1201 	if (dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
1202 		return;
1203 
1204 	if (enable)
1205 		dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
1206 	else
1207 		dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
1208 
1209 	/* Auxiliary bus probe/remove can register or unregister representor
1210 	 * callbacks and take reps_lock. Drop ldev->lock so the only ordering
1211 	 * remains reps_lock -> ldev->lock from representor callbacks.
1212 	 */
1213 	mlx5_lag_drop_lock_for_reps(ldev, mlx5_lag_get_filter(ldev, dev));
1214 	mlx5_rescan_drivers_locked(dev);
1215 	mlx5_lag_retake_lock_after_reps(ldev);
1216 }
1217 
1218 static void mlx5_lag_rescan_devices_locked_filter(struct mlx5_lag *ldev,
1219 						  bool enable, u32 filter)
1220 {
1221 	struct mlx5_core_dev *devs[MLX5_MAX_PORTS];
1222 	struct lag_func *pf;
1223 	int num_devs = 0;
1224 	int i;
1225 
1226 	mlx5_lag_assert_locked_transition(ldev, filter);
1227 
1228 	mlx5_lag_for_each(i, 0, ldev, filter) {
1229 		pf = mlx5_lag_pf(ldev, i);
1230 		if (pf->dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
1231 			continue;
1232 
1233 		if (enable)
1234 			pf->dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
1235 		else
1236 			pf->dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
1237 		devs[num_devs++] = pf->dev;
1238 	}
1239 
1240 	mlx5_lag_drop_lock_for_reps(ldev, filter);
1241 	for (i = 0; i < num_devs; i++)
1242 		mlx5_rescan_drivers_locked(devs[i]);
1243 	mlx5_lag_retake_lock_after_reps(ldev);
1244 }
1245 
1246 void mlx5_lag_add_devices_filter(struct mlx5_lag *ldev, u32 filter)
1247 {
1248 	mlx5_lag_rescan_devices_locked_filter(ldev, true, filter);
1249 }
1250 
1251 void mlx5_lag_add_devices(struct mlx5_lag *ldev)
1252 {
1253 	mlx5_lag_add_devices_filter(ldev, MLX5_LAG_FILTER_PORTS);
1254 }
1255 
1256 void mlx5_lag_remove_devices_filter(struct mlx5_lag *ldev, u32 filter)
1257 {
1258 	mlx5_lag_rescan_devices_locked_filter(ldev, false, filter);
1259 }
1260 
1261 void mlx5_lag_remove_devices(struct mlx5_lag *ldev)
1262 {
1263 	mlx5_lag_remove_devices_filter(ldev, MLX5_LAG_FILTER_PORTS);
1264 }
1265 
1266 static int mlx5_lag_reload_ib_reps_unlocked(struct mlx5_lag *ldev, u32 flags,
1267 					    u32 filter, bool cont_on_fail)
1268 {
1269 	struct lag_func *pf;
1270 	int ret;
1271 	int i;
1272 
1273 	mlx5_lag_for_each(i, 0, ldev, filter) {
1274 		pf = mlx5_lag_pf(ldev, i);
1275 		if (!(pf->dev->priv.flags & flags)) {
1276 			struct mlx5_eswitch *esw;
1277 
1278 			esw = pf->dev->priv.eswitch;
1279 			mlx5_esw_reps_block(esw);
1280 			ret = mlx5_eswitch_reload_ib_reps(esw);
1281 			mlx5_esw_reps_unblock(esw);
1282 			if (ret && !cont_on_fail)
1283 				return ret;
1284 		}
1285 	}
1286 
1287 	return 0;
1288 }
1289 
1290 static int mlx5_lag_reload_ib_reps(struct mlx5_lag *ldev, u32 flags,
1291 				   u32 filter, bool cont_on_fail)
1292 {
1293 	int ret;
1294 
1295 	/* The HCA devcom component lock serializes LAG mode transitions while
1296 	 * ldev->lock is dropped here. Dropping ldev->lock is required because
1297 	 * the reload takes the per-E-Switch reps_lock, and representor
1298 	 * load/unload callbacks can re-enter LAG netdev add/remove and take
1299 	 * ldev->lock. Keep the ordering reps_lock -> ldev->lock.
1300 	 */
1301 	mlx5_lag_drop_lock_for_reps(ldev, filter);
1302 	ret = mlx5_lag_reload_ib_reps_unlocked(ldev, flags, filter,
1303 					       cont_on_fail);
1304 	mlx5_lag_retake_lock_after_reps(ldev);
1305 
1306 	return ret;
1307 }
1308 
1309 int mlx5_lag_reload_ib_reps_from_locked(struct mlx5_lag *ldev, u32 flags,
1310 					u32 filter, bool cont_on_fail)
1311 {
1312 	return mlx5_lag_reload_ib_reps(ldev, flags, filter, cont_on_fail);
1313 }
1314 
1315 static void mlx5_lag_unload_reps_unlocked(struct mlx5_lag *ldev, u32 filter)
1316 {
1317 	struct lag_func *pf;
1318 	int i;
1319 
1320 	mlx5_lag_for_each(i, 0, ldev, filter) {
1321 		struct mlx5_eswitch *esw;
1322 
1323 		pf = mlx5_lag_pf(ldev, i);
1324 		esw = pf->dev->priv.eswitch;
1325 		mlx5_esw_reps_block(esw);
1326 		mlx5_eswitch_unload_reps(esw);
1327 		mlx5_esw_reps_unblock(esw);
1328 	}
1329 }
1330 
1331 void mlx5_lag_unload_reps_from_locked(struct mlx5_lag *ldev, u32 filter)
1332 {
1333 	/* Same lock dance as mlx5_lag_reload_ib_reps: drop ldev->lock around
1334 	 * the per-eswitch reps_lock to keep the reps_lock -> ldev->lock order.
1335 	 */
1336 	mlx5_lag_drop_lock_for_reps(ldev, filter);
1337 	mlx5_lag_unload_reps_unlocked(ldev, filter);
1338 	mlx5_lag_retake_lock_after_reps(ldev);
1339 }
1340 
1341 void mlx5_disable_lag(struct mlx5_lag *ldev)
1342 {
1343 	bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
1344 	int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
1345 	struct mlx5_core_dev *dev0;
1346 	bool roce_lag;
1347 	int err;
1348 	int i;
1349 
1350 	if (idx < 0)
1351 		return;
1352 
1353 	if (shared_fdb) {
1354 		mlx5_lag_shared_fdb_destroy(ldev, 0);
1355 		return;
1356 	}
1357 
1358 	dev0 = mlx5_lag_pf(ldev, idx)->dev;
1359 	roce_lag = __mlx5_lag_is_roce(ldev);
1360 
1361 	if (roce_lag) {
1362 		mlx5_lag_rescan_dev_locked(ldev, dev0, false);
1363 		mlx5_ldev_for_each(i, 0, ldev) {
1364 			if (i == idx)
1365 				continue;
1366 			mlx5_nic_vport_disable_roce(mlx5_lag_pf(ldev, i)->dev);
1367 		}
1368 	}
1369 
1370 	err = mlx5_deactivate_lag(ldev);
1371 	if (err)
1372 		return;
1373 
1374 	if (roce_lag)
1375 		mlx5_lag_add_devices(ldev);
1376 }
1377 
1378 static bool mlx5_lag_is_roce_lag(struct mlx5_lag *ldev)
1379 {
1380 	bool roce_lag = true;
1381 	struct lag_func *pf;
1382 	int i;
1383 
1384 	mlx5_ldev_for_each(i, 0, ldev) {
1385 		pf = mlx5_lag_pf(ldev, i);
1386 		roce_lag = roce_lag && !mlx5_sriov_is_enabled(pf->dev);
1387 	}
1388 
1389 #ifdef CONFIG_MLX5_ESWITCH
1390 	mlx5_ldev_for_each(i, 0, ldev) {
1391 		pf = mlx5_lag_pf(ldev, i);
1392 		roce_lag = roce_lag && is_mdev_legacy_mode(pf->dev);
1393 	}
1394 #endif
1395 
1396 	return roce_lag;
1397 }
1398 
1399 static bool mlx5_lag_should_modify_lag(struct mlx5_lag *ldev, bool do_bond)
1400 {
1401 	return do_bond && __mlx5_lag_is_active(ldev) &&
1402 	       ldev->mode != MLX5_LAG_MODE_MPESW;
1403 }
1404 
1405 static bool mlx5_lag_should_disable_lag(struct mlx5_lag *ldev, bool do_bond)
1406 {
1407 	return !do_bond && __mlx5_lag_is_active(ldev) &&
1408 	       ldev->mode != MLX5_LAG_MODE_MPESW;
1409 }
1410 
1411 #ifdef CONFIG_MLX5_ESWITCH
1412 static int
1413 mlx5_lag_sum_devices_speed(struct mlx5_lag *ldev, u32 *sum_speed,
1414 			   int (*get_speed)(struct mlx5_core_dev *, u32 *))
1415 {
1416 	struct mlx5_core_dev *pf_mdev;
1417 	struct lag_func *pf;
1418 	int pf_idx;
1419 	u32 speed;
1420 	int ret;
1421 
1422 	*sum_speed = 0;
1423 	mlx5_ldev_for_each(pf_idx, 0, ldev) {
1424 		pf = mlx5_lag_pf(ldev, pf_idx);
1425 		if (!pf)
1426 			continue;
1427 		pf_mdev = pf->dev;
1428 		if (!pf_mdev)
1429 			continue;
1430 
1431 		ret = get_speed(pf_mdev, &speed);
1432 		if (ret) {
1433 			mlx5_core_dbg(pf_mdev,
1434 				      "Failed to get device speed using %ps. Device %s speed is not available (err=%d)\n",
1435 				      get_speed, dev_name(pf_mdev->device),
1436 				      ret);
1437 			return ret;
1438 		}
1439 
1440 		*sum_speed += speed;
1441 	}
1442 
1443 	return 0;
1444 }
1445 
1446 static int mlx5_lag_sum_devices_max_speed(struct mlx5_lag *ldev, u32 *max_speed)
1447 {
1448 	return mlx5_lag_sum_devices_speed(ldev, max_speed,
1449 					  mlx5_port_max_linkspeed);
1450 }
1451 
1452 static int mlx5_lag_sum_devices_oper_speed(struct mlx5_lag *ldev,
1453 					   u32 *oper_speed)
1454 {
1455 	return mlx5_lag_sum_devices_speed(ldev, oper_speed,
1456 					  mlx5_port_oper_linkspeed);
1457 }
1458 
1459 static void mlx5_lag_modify_device_vports_speed(struct mlx5_core_dev *mdev,
1460 						u32 speed)
1461 {
1462 	u16 op_mod = MLX5_VPORT_STATE_OP_MOD_ESW_VPORT;
1463 	struct mlx5_eswitch *esw = mdev->priv.eswitch;
1464 	struct mlx5_vport *vport;
1465 	unsigned long i;
1466 	int ret;
1467 
1468 	if (!esw)
1469 		return;
1470 
1471 	if (!MLX5_CAP_ESW(mdev, esw_vport_state_max_tx_speed))
1472 		return;
1473 
1474 	mlx5_esw_for_each_vport(esw, i, vport) {
1475 		if (!vport)
1476 			continue;
1477 
1478 		if (vport->vport == MLX5_VPORT_UPLINK)
1479 			continue;
1480 
1481 		vport->agg_max_tx_speed = speed;
1482 
1483 		if (!vport->enabled)
1484 			continue;
1485 
1486 		ret = mlx5_modify_vport_max_tx_speed(mdev, op_mod,
1487 						     vport->vport, true, speed);
1488 		if (ret)
1489 			mlx5_core_dbg(mdev,
1490 				      "Failed to set vport %d speed %d, err=%d\n",
1491 				      vport->vport, speed, ret);
1492 	}
1493 }
1494 
1495 void mlx5_lag_set_vports_agg_speed(struct mlx5_lag *ldev)
1496 {
1497 	struct mlx5_core_dev *mdev;
1498 	struct lag_func *pf;
1499 	u32 speed;
1500 	int pf_idx;
1501 
1502 	if (ldev->mode == MLX5_LAG_MODE_MPESW) {
1503 		if (mlx5_lag_sum_devices_oper_speed(ldev, &speed))
1504 			return;
1505 	} else {
1506 		speed = ldev->tracker.bond_speed_mbps;
1507 		if (speed == SPEED_UNKNOWN)
1508 			return;
1509 	}
1510 
1511 	/* If speed is not set, use the sum of max speeds of all PFs */
1512 	if (!speed && mlx5_lag_sum_devices_max_speed(ldev, &speed))
1513 		return;
1514 
1515 	speed = speed / MLX5_MAX_TX_SPEED_UNIT;
1516 
1517 	mlx5_ldev_for_each(pf_idx, 0, ldev) {
1518 		pf = mlx5_lag_pf(ldev, pf_idx);
1519 		if (!pf)
1520 			continue;
1521 		mdev = pf->dev;
1522 		if (!mdev)
1523 			continue;
1524 
1525 		mlx5_lag_modify_device_vports_speed(mdev, speed);
1526 	}
1527 }
1528 
1529 void mlx5_lag_reset_vports_speed(struct mlx5_lag *ldev)
1530 {
1531 	struct mlx5_core_dev *mdev;
1532 	struct lag_func *pf;
1533 	u32 speed;
1534 	int pf_idx;
1535 	int ret;
1536 
1537 	mlx5_ldev_for_each(pf_idx, 0, ldev) {
1538 		pf = mlx5_lag_pf(ldev, pf_idx);
1539 		if (!pf)
1540 			continue;
1541 		mdev = pf->dev;
1542 		if (!mdev)
1543 			continue;
1544 
1545 		ret = mlx5_port_oper_linkspeed(mdev, &speed);
1546 		if (ret) {
1547 			mlx5_core_dbg(mdev,
1548 				      "Failed to reset vports speed for device %s. Oper speed is not available (err=%d)\n",
1549 				      dev_name(mdev->device), ret);
1550 			continue;
1551 		}
1552 
1553 		speed = speed / MLX5_MAX_TX_SPEED_UNIT;
1554 		mlx5_lag_modify_device_vports_speed(mdev, speed);
1555 	}
1556 }
1557 #endif
1558 
1559 static void mlx5_do_bond(struct mlx5_lag *ldev)
1560 {
1561 	int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
1562 	struct lag_tracker tracker = { };
1563 	struct mlx5_core_dev *dev0;
1564 	struct net_device *ndev;
1565 	bool do_bond, roce_lag;
1566 	int err;
1567 	int i;
1568 
1569 	if (idx < 0)
1570 		return;
1571 
1572 	dev0 = mlx5_lag_pf(ldev, idx)->dev;
1573 	if (!mlx5_lag_is_ready(ldev)) {
1574 		do_bond = false;
1575 	} else {
1576 		/* VF LAG is in multipath mode, ignore bond change requests */
1577 		if (mlx5_lag_is_multipath(dev0))
1578 			return;
1579 
1580 		tracker = ldev->tracker;
1581 
1582 		do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
1583 	}
1584 
1585 	if (do_bond && !__mlx5_lag_is_active(ldev)) {
1586 		bool shared_fdb = mlx5_lag_shared_fdb_supported(ldev);
1587 
1588 		roce_lag = mlx5_lag_is_roce_lag(ldev);
1589 
1590 		if (shared_fdb) {
1591 			err = mlx5_lag_shared_fdb_create(ldev, &tracker,
1592 							 MLX5_LAG_MODE_SRIOV,
1593 							 0);
1594 			if (err)
1595 				return;
1596 		} else {
1597 			if (roce_lag)
1598 				mlx5_lag_remove_devices(ldev);
1599 
1600 			err = mlx5_activate_lag(ldev, &tracker,
1601 						roce_lag ? MLX5_LAG_MODE_ROCE :
1602 							   MLX5_LAG_MODE_SRIOV,
1603 						false);
1604 			if (err) {
1605 				if (roce_lag)
1606 					mlx5_lag_add_devices(ldev);
1607 				return;
1608 			}
1609 
1610 			if (roce_lag) {
1611 				struct mlx5_core_dev *dev;
1612 
1613 				mlx5_lag_rescan_dev_locked(ldev, dev0, true);
1614 				mlx5_ldev_for_each(i, 0, ldev) {
1615 					if (i == idx)
1616 						continue;
1617 					dev = mlx5_lag_pf(ldev, i)->dev;
1618 					if (mlx5_get_roce_state(dev))
1619 						mlx5_nic_vport_enable_roce(dev);
1620 				}
1621 			}
1622 		}
1623 		if (tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
1624 			ndev = mlx5_lag_active_backup_get_netdev(dev0);
1625 			/** Only sriov and roce lag should have tracker->TX_type
1626 			 *  set so no need to check the mode
1627 			 */
1628 			blocking_notifier_call_chain(&dev0->priv.lag_nh,
1629 						     MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE,
1630 						     ndev);
1631 			dev_put(ndev);
1632 		}
1633 		if (!shared_fdb)
1634 			mlx5_lag_set_vports_agg_speed(ldev);
1635 	} else if (mlx5_lag_should_modify_lag(ldev, do_bond)) {
1636 		mlx5_modify_lag(ldev, &tracker);
1637 		mlx5_lag_set_vports_agg_speed(ldev);
1638 	} else if (mlx5_lag_should_disable_lag(ldev, do_bond)) {
1639 		mlx5_lag_reset_vports_speed(ldev);
1640 		mlx5_disable_lag(ldev);
1641 	}
1642 }
1643 
1644 /* The last mdev to unregister will destroy the workqueue before removing the
1645  * devcom component, and as all the mdevs use the same devcom component we are
1646  * guaranteed that the devcom is valid while the calling work is running.
1647  */
1648 struct mlx5_devcom_comp_dev *mlx5_lag_get_devcom_comp(struct mlx5_lag *ldev)
1649 {
1650 	struct mlx5_devcom_comp_dev *devcom = NULL;
1651 	struct lag_func *pf;
1652 	int i;
1653 
1654 	mutex_lock(&ldev->lock);
1655 	i = mlx5_get_next_lag_func(ldev, 0, MLX5_LAG_FILTER_PORTS);
1656 	if (i < MLX5_MAX_PORTS) {
1657 		pf = mlx5_lag_pf(ldev, i);
1658 		devcom = pf->dev->priv.hca_devcom_comp;
1659 	}
1660 	mutex_unlock(&ldev->lock);
1661 	return devcom;
1662 }
1663 
1664 static int mlx5_lag_demux_ft_fg_init(struct mlx5_core_dev *dev,
1665 				     struct mlx5_flow_table_attr *ft_attr,
1666 				     struct lag_func *pf)
1667 {
1668 #ifdef CONFIG_MLX5_ESWITCH
1669 	struct mlx5_flow_namespace *ns;
1670 	struct mlx5_flow_group *fg;
1671 	int err;
1672 
1673 	ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_LAG);
1674 	if (!ns)
1675 		return 0;
1676 
1677 	pf->lag_demux_ft = mlx5_create_flow_table(ns, ft_attr);
1678 	if (IS_ERR(pf->lag_demux_ft))
1679 		return PTR_ERR(pf->lag_demux_ft);
1680 
1681 	fg = mlx5_esw_lag_demux_fg_create(dev->priv.eswitch,
1682 					  pf->lag_demux_ft);
1683 	if (IS_ERR(fg)) {
1684 		err = PTR_ERR(fg);
1685 		mlx5_destroy_flow_table(pf->lag_demux_ft);
1686 		pf->lag_demux_ft = NULL;
1687 		return err;
1688 	}
1689 
1690 	pf->lag_demux_fg = fg;
1691 	return 0;
1692 #else
1693 	return -EOPNOTSUPP;
1694 #endif
1695 }
1696 
1697 static int mlx5_lag_demux_fw_init(struct mlx5_core_dev *dev,
1698 				  struct mlx5_flow_table_attr *ft_attr,
1699 				  struct lag_func *pf)
1700 {
1701 	struct mlx5_flow_namespace *ns;
1702 	int err;
1703 
1704 	ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_LAG);
1705 	if (!ns)
1706 		return 0;
1707 
1708 	pf->lag_demux_fg = NULL;
1709 	ft_attr->max_fte = 1;
1710 	pf->lag_demux_ft = mlx5_create_lag_demux_flow_table(ns, ft_attr);
1711 	if (IS_ERR(pf->lag_demux_ft)) {
1712 		err = PTR_ERR(pf->lag_demux_ft);
1713 		pf->lag_demux_ft = NULL;
1714 		return err;
1715 	}
1716 
1717 	return 0;
1718 }
1719 
1720 int mlx5_lag_demux_init(struct mlx5_core_dev *dev,
1721 			struct mlx5_flow_table_attr *ft_attr)
1722 {
1723 	struct mlx5_lag *ldev;
1724 	struct lag_func *pf;
1725 
1726 	if (!ft_attr)
1727 		return -EINVAL;
1728 
1729 	ldev = mlx5_lag_dev(dev);
1730 	if (!ldev)
1731 		return -ENODEV;
1732 
1733 	pf = mlx5_lag_pf_by_dev(ldev, dev);
1734 	if (!pf)
1735 		return -ENODEV;
1736 
1737 	xa_init(&pf->lag_demux_rules);
1738 
1739 	if (mlx5_lag_is_sw_lag(dev))
1740 		return mlx5_lag_demux_ft_fg_init(dev, ft_attr, pf);
1741 
1742 	return mlx5_lag_demux_fw_init(dev, ft_attr, pf);
1743 }
1744 EXPORT_SYMBOL(mlx5_lag_demux_init);
1745 
1746 void mlx5_lag_demux_cleanup(struct mlx5_core_dev *dev)
1747 {
1748 	struct mlx5_flow_handle *rule;
1749 	struct mlx5_lag *ldev;
1750 	unsigned long vport_num;
1751 	struct lag_func *pf;
1752 
1753 	ldev = mlx5_lag_dev(dev);
1754 	if (!ldev)
1755 		return;
1756 
1757 	pf = mlx5_lag_pf_by_dev(ldev, dev);
1758 	if (!pf)
1759 		return;
1760 
1761 	xa_for_each(&pf->lag_demux_rules, vport_num, rule)
1762 		mlx5_del_flow_rules(rule);
1763 	xa_destroy(&pf->lag_demux_rules);
1764 
1765 	if (pf->lag_demux_fg)
1766 		mlx5_destroy_flow_group(pf->lag_demux_fg);
1767 	if (pf->lag_demux_ft)
1768 		mlx5_destroy_flow_table(pf->lag_demux_ft);
1769 	pf->lag_demux_fg = NULL;
1770 	pf->lag_demux_ft = NULL;
1771 }
1772 EXPORT_SYMBOL(mlx5_lag_demux_cleanup);
1773 
1774 static struct lag_func *mlx5_lag_dev_get_master_pf(struct mlx5_lag *ldev,
1775 						   struct mlx5_core_dev *dev)
1776 {
1777 	u32 filter = mlx5_lag_get_filter(ldev, dev);
1778 	int idx;
1779 
1780 	idx = mlx5_lag_get_dev_index_by_seq_filter(ldev, MLX5_LAG_P1, filter);
1781 	if (idx < 0)
1782 		return NULL;
1783 
1784 	return mlx5_lag_pf(ldev, idx);
1785 }
1786 
1787 int mlx5_lag_demux_rule_add(struct mlx5_core_dev *vport_dev, u16 vport_num,
1788 			    int index)
1789 {
1790 	struct mlx5_flow_handle *rule;
1791 	struct lag_func *master;
1792 	struct mlx5_lag *ldev;
1793 	int err;
1794 
1795 	ldev = mlx5_lag_dev(vport_dev);
1796 	if (!ldev)
1797 		return 0;
1798 
1799 	master = mlx5_lag_dev_get_master_pf(ldev, vport_dev);
1800 	if (!master || !master->lag_demux_fg)
1801 		return 0;
1802 
1803 	if (xa_load(&master->lag_demux_rules, index))
1804 		return 0;
1805 
1806 	rule = mlx5_esw_lag_demux_rule_create(vport_dev->priv.eswitch,
1807 					      vport_num, master->lag_demux_ft);
1808 	if (IS_ERR(rule)) {
1809 		err = PTR_ERR(rule);
1810 		mlx5_core_warn(vport_dev,
1811 			       "Failed to create LAG demux rule for vport %u, err %d\n",
1812 			       vport_num, err);
1813 		return err;
1814 	}
1815 
1816 	err = xa_err(xa_store(&master->lag_demux_rules, index, rule,
1817 			      GFP_KERNEL));
1818 	if (err) {
1819 		mlx5_del_flow_rules(rule);
1820 		mlx5_core_warn(vport_dev,
1821 			       "Failed to store LAG demux rule for vport %u, err %d\n",
1822 			       vport_num, err);
1823 	}
1824 
1825 	return err;
1826 }
1827 EXPORT_SYMBOL(mlx5_lag_demux_rule_add);
1828 
1829 void mlx5_lag_demux_rule_del(struct mlx5_core_dev *dev, int index)
1830 {
1831 	struct mlx5_flow_handle *rule;
1832 	struct lag_func *master_pf;
1833 	struct mlx5_lag *ldev;
1834 
1835 	ldev = mlx5_lag_dev(dev);
1836 	if (!ldev)
1837 		return;
1838 
1839 	master_pf = mlx5_lag_dev_get_master_pf(ldev, dev);
1840 	if (!master_pf || !master_pf->lag_demux_fg)
1841 		return;
1842 
1843 	rule = xa_erase(&master_pf->lag_demux_rules, index);
1844 	if (rule)
1845 		mlx5_del_flow_rules(rule);
1846 }
1847 EXPORT_SYMBOL(mlx5_lag_demux_rule_del);
1848 
1849 static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
1850 {
1851 	queue_delayed_work(ldev->wq, &ldev->bond_work, delay);
1852 }
1853 
1854 static void mlx5_do_bond_work(struct work_struct *work)
1855 {
1856 	struct delayed_work *delayed_work = to_delayed_work(work);
1857 	struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag,
1858 					     bond_work);
1859 	struct mlx5_devcom_comp_dev *devcom;
1860 	int status;
1861 
1862 	devcom = mlx5_lag_get_devcom_comp(ldev);
1863 	if (!devcom)
1864 		return;
1865 
1866 	status = mlx5_devcom_comp_trylock(devcom);
1867 	if (!status) {
1868 		mlx5_queue_bond_work(ldev, HZ);
1869 		return;
1870 	}
1871 
1872 	mutex_lock(&ldev->lock);
1873 	if (ldev->mode_changes_in_progress) {
1874 		mutex_unlock(&ldev->lock);
1875 		mlx5_devcom_comp_unlock(devcom);
1876 		mlx5_queue_bond_work(ldev, HZ);
1877 		return;
1878 	}
1879 
1880 	mlx5_do_bond(ldev);
1881 	mutex_unlock(&ldev->lock);
1882 	mlx5_devcom_comp_unlock(devcom);
1883 }
1884 
1885 static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
1886 					 struct lag_tracker *tracker,
1887 					 struct netdev_notifier_changeupper_info *info)
1888 {
1889 	struct net_device *upper = info->upper_dev, *ndev_tmp;
1890 	struct netdev_lag_upper_info *lag_upper_info = NULL;
1891 	bool is_bonded, is_in_lag, mode_supported;
1892 	bool has_inactive = 0;
1893 	struct lag_func *pf;
1894 	struct slave *slave;
1895 	u8 bond_status = 0;
1896 	int num_slaves = 0;
1897 	int changed = 0;
1898 	int i, idx = -1;
1899 
1900 	if (!netif_is_lag_master(upper))
1901 		return 0;
1902 
1903 	if (info->linking)
1904 		lag_upper_info = info->upper_info;
1905 
1906 	/* The event may still be of interest if the slave does not belong to
1907 	 * us, but is enslaved to a master which has one or more of our netdevs
1908 	 * as slaves (e.g., if a new slave is added to a master that bonds two
1909 	 * of our netdevs, we should unbond).
1910 	 */
1911 	rcu_read_lock();
1912 	for_each_netdev_in_bond_rcu(upper, ndev_tmp) {
1913 		mlx5_ldev_for_each(i, 0, ldev) {
1914 			pf = mlx5_lag_pf(ldev, i);
1915 			if (pf->netdev == ndev_tmp) {
1916 				idx++;
1917 				break;
1918 			}
1919 		}
1920 		if (i < MLX5_MAX_PORTS) {
1921 			slave = bond_slave_get_rcu(ndev_tmp);
1922 			if (slave)
1923 				has_inactive |= bond_is_slave_inactive(slave);
1924 			bond_status |= (1 << idx);
1925 		}
1926 
1927 		num_slaves++;
1928 	}
1929 	rcu_read_unlock();
1930 
1931 	/* None of this lagdev's netdevs are slaves of this master. */
1932 	if (!(bond_status & GENMASK(ldev->ports - 1, 0)))
1933 		return 0;
1934 
1935 	if (lag_upper_info) {
1936 		tracker->tx_type = lag_upper_info->tx_type;
1937 		tracker->hash_type = lag_upper_info->hash_type;
1938 	}
1939 
1940 	tracker->has_inactive = has_inactive;
1941 	/* Determine bonding status:
1942 	 * A device is considered bonded if both its physical ports are slaves
1943 	 * of the same lag master, and only them.
1944 	 */
1945 	is_in_lag = num_slaves == ldev->ports &&
1946 		bond_status == GENMASK(ldev->ports - 1, 0);
1947 
1948 	/* Lag mode must be activebackup or hash. */
1949 	mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP ||
1950 			 tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH;
1951 
1952 	is_bonded = is_in_lag && mode_supported;
1953 	if (tracker->is_bonded != is_bonded) {
1954 		tracker->is_bonded = is_bonded;
1955 		changed = 1;
1956 	}
1957 
1958 	if (!is_in_lag)
1959 		return changed;
1960 
1961 	if (!mlx5_lag_is_ready(ldev))
1962 		NL_SET_ERR_MSG_MOD(info->info.extack,
1963 				   "Can't activate LAG offload, PF is configured with more than 64 VFs");
1964 	else if (!mode_supported)
1965 		NL_SET_ERR_MSG_MOD(info->info.extack,
1966 				   "Can't activate LAG offload, TX type isn't supported");
1967 
1968 	return changed;
1969 }
1970 
1971 static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev,
1972 					      struct lag_tracker *tracker,
1973 					      struct net_device *ndev,
1974 					      struct netdev_notifier_changelowerstate_info *info)
1975 {
1976 	struct netdev_lag_lower_state_info *lag_lower_info;
1977 	int idx;
1978 
1979 	if (!netif_is_lag_port(ndev))
1980 		return 0;
1981 
1982 	idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev);
1983 	if (idx < 0)
1984 		return 0;
1985 
1986 	/* This information is used to determine virtual to physical
1987 	 * port mapping.
1988 	 */
1989 	lag_lower_info = info->lower_state_info;
1990 	if (!lag_lower_info)
1991 		return 0;
1992 
1993 	tracker->netdev_state[idx] = *lag_lower_info;
1994 
1995 	return 1;
1996 }
1997 
1998 static int mlx5_handle_changeinfodata_event(struct mlx5_lag *ldev,
1999 					    struct lag_tracker *tracker,
2000 					    struct net_device *ndev)
2001 {
2002 	struct net_device *ndev_tmp;
2003 	struct slave *slave;
2004 	bool has_inactive = 0;
2005 	int idx;
2006 
2007 	if (!netif_is_lag_master(ndev))
2008 		return 0;
2009 
2010 	rcu_read_lock();
2011 	for_each_netdev_in_bond_rcu(ndev, ndev_tmp) {
2012 		idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
2013 		if (idx < 0)
2014 			continue;
2015 
2016 		slave = bond_slave_get_rcu(ndev_tmp);
2017 		if (slave)
2018 			has_inactive |= bond_is_slave_inactive(slave);
2019 	}
2020 	rcu_read_unlock();
2021 
2022 	if (tracker->has_inactive == has_inactive)
2023 		return 0;
2024 
2025 	tracker->has_inactive = has_inactive;
2026 
2027 	return 1;
2028 }
2029 
2030 static void mlx5_lag_update_tracker_speed(struct lag_tracker *tracker,
2031 					  struct net_device *ndev)
2032 {
2033 	struct ethtool_link_ksettings lksettings;
2034 	struct net_device *bond_dev;
2035 	int err;
2036 
2037 	if (netif_is_lag_master(ndev))
2038 		bond_dev = ndev;
2039 	else
2040 		bond_dev = netdev_master_upper_dev_get(ndev);
2041 
2042 	if (!bond_dev) {
2043 		tracker->bond_speed_mbps = SPEED_UNKNOWN;
2044 		return;
2045 	}
2046 
2047 	err = __ethtool_get_link_ksettings(bond_dev, &lksettings);
2048 	if (err) {
2049 		netdev_dbg(bond_dev,
2050 			   "Failed to get speed for bond dev %s, err=%d\n",
2051 			   bond_dev->name, err);
2052 		tracker->bond_speed_mbps = SPEED_UNKNOWN;
2053 		return;
2054 	}
2055 
2056 	if (lksettings.base.speed == SPEED_UNKNOWN)
2057 		tracker->bond_speed_mbps = 0;
2058 	else
2059 		tracker->bond_speed_mbps = lksettings.base.speed;
2060 }
2061 
2062 /* Returns speed in Mbps. */
2063 int mlx5_lag_query_bond_speed(struct mlx5_core_dev *mdev, u32 *speed)
2064 {
2065 	struct mlx5_lag *ldev;
2066 	unsigned long flags;
2067 	int ret = 0;
2068 
2069 	spin_lock_irqsave(&lag_lock, flags);
2070 	ldev = mlx5_lag_dev(mdev);
2071 	if (!ldev) {
2072 		ret = -ENODEV;
2073 		goto unlock;
2074 	}
2075 
2076 	*speed = ldev->tracker.bond_speed_mbps;
2077 
2078 	if (*speed == SPEED_UNKNOWN) {
2079 		mlx5_core_dbg(mdev, "Bond speed is unknown\n");
2080 		ret = -EINVAL;
2081 	}
2082 
2083 unlock:
2084 	spin_unlock_irqrestore(&lag_lock, flags);
2085 	return ret;
2086 }
2087 EXPORT_SYMBOL_GPL(mlx5_lag_query_bond_speed);
2088 
2089 /* this handler is always registered to netdev events */
2090 static int mlx5_lag_netdev_event(struct notifier_block *this,
2091 				 unsigned long event, void *ptr)
2092 {
2093 	struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
2094 	struct lag_tracker tracker;
2095 	struct mlx5_lag *ldev;
2096 	int changed = 0;
2097 
2098 	if (event != NETDEV_CHANGEUPPER &&
2099 	    event != NETDEV_CHANGELOWERSTATE &&
2100 	    event != NETDEV_CHANGEINFODATA)
2101 		return NOTIFY_DONE;
2102 
2103 	ldev    = container_of(this, struct mlx5_lag, nb);
2104 
2105 	tracker = ldev->tracker;
2106 
2107 	switch (event) {
2108 	case NETDEV_CHANGEUPPER:
2109 		changed = mlx5_handle_changeupper_event(ldev, &tracker, ptr);
2110 		break;
2111 	case NETDEV_CHANGELOWERSTATE:
2112 		changed = mlx5_handle_changelowerstate_event(ldev, &tracker,
2113 							     ndev, ptr);
2114 		break;
2115 	case NETDEV_CHANGEINFODATA:
2116 		changed = mlx5_handle_changeinfodata_event(ldev, &tracker, ndev);
2117 		break;
2118 	}
2119 
2120 	if (changed)
2121 		mlx5_lag_update_tracker_speed(&tracker, ndev);
2122 
2123 	ldev->tracker = tracker;
2124 
2125 	if (changed)
2126 		mlx5_queue_bond_work(ldev, 0);
2127 
2128 	return NOTIFY_DONE;
2129 }
2130 
2131 static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev,
2132 				struct mlx5_core_dev *dev,
2133 				struct net_device *netdev)
2134 {
2135 	struct lag_func *pf;
2136 	unsigned long flags;
2137 	int i;
2138 
2139 	spin_lock_irqsave(&lag_lock, flags);
2140 	/* Find pf entry by matching dev pointer */
2141 	mlx5_ldev_for_each(i, 0, ldev) {
2142 		pf = mlx5_lag_pf(ldev, i);
2143 		if (pf->dev == dev) {
2144 			pf->netdev = netdev;
2145 			ldev->tracker.netdev_state[i].link_up = 0;
2146 			ldev->tracker.netdev_state[i].tx_enabled = 0;
2147 			break;
2148 		}
2149 	}
2150 	spin_unlock_irqrestore(&lag_lock, flags);
2151 }
2152 
2153 static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev,
2154 				    struct net_device *netdev)
2155 {
2156 	struct lag_func *pf;
2157 	unsigned long flags;
2158 	int i;
2159 
2160 	spin_lock_irqsave(&lag_lock, flags);
2161 	mlx5_ldev_for_each(i, 0, ldev) {
2162 		pf = mlx5_lag_pf(ldev, i);
2163 		if (pf->netdev == netdev) {
2164 			pf->netdev = NULL;
2165 			break;
2166 		}
2167 	}
2168 	spin_unlock_irqrestore(&lag_lock, flags);
2169 }
2170 
2171 int mlx5_ldev_add_mdev(struct mlx5_lag *ldev,
2172 		       struct mlx5_core_dev *dev,
2173 		       u32 group_id)
2174 {
2175 	struct lag_func *pf;
2176 	u32 idx;
2177 	int err;
2178 
2179 	pf = kzalloc_obj(*pf);
2180 	if (!pf)
2181 		return -ENOMEM;
2182 
2183 	err = xa_alloc(&ldev->pfs, &idx, pf, XA_LIMIT(0, MLX5_MAX_PORTS - 1),
2184 		       GFP_KERNEL);
2185 	if (err) {
2186 		kfree(pf);
2187 		return err;
2188 	}
2189 
2190 	pf->idx = idx;
2191 	pf->dev = dev;
2192 	pf->group_id = group_id;
2193 	dev->priv.lag = ldev;
2194 
2195 	if (group_id)
2196 		return 0;
2197 
2198 	xa_set_mark(&ldev->pfs, idx, MLX5_LAG_XA_MARK_PORT);
2199 
2200 	MLX5_NB_INIT(&pf->port_change_nb,
2201 		     mlx5_lag_mpesw_port_change_event, PORT_CHANGE);
2202 	mlx5_eq_notifier_register(dev, &pf->port_change_nb);
2203 
2204 	return 0;
2205 }
2206 
2207 void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
2208 			   struct mlx5_core_dev *dev)
2209 {
2210 	struct lag_func *pf;
2211 	int i;
2212 
2213 	mlx5_lag_for_each(i, 0, ldev, MLX5_LAG_FILTER_ALL) {
2214 		pf = mlx5_lag_pf(ldev, i);
2215 		if (pf->dev == dev)
2216 			break;
2217 	}
2218 	if (i >= MLX5_MAX_PORTS)
2219 		return;
2220 
2221 	if (pf->port_change_nb.nb.notifier_call)
2222 		mlx5_eq_notifier_unregister(dev, &pf->port_change_nb);
2223 
2224 	pf->dev = NULL;
2225 	dev->priv.lag = NULL;
2226 	xa_erase(&ldev->pfs, pf->idx);
2227 	kfree(pf);
2228 }
2229 
2230 /* Must be called with HCA devcom component lock held */
2231 static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
2232 {
2233 	struct mlx5_devcom_comp_dev *pos = NULL;
2234 	struct mlx5_lag *ldev = NULL;
2235 	struct mlx5_core_dev *tmp_dev;
2236 	int err;
2237 
2238 	tmp_dev = mlx5_devcom_get_next_peer_data(dev->priv.hca_devcom_comp, &pos);
2239 	if (tmp_dev)
2240 		ldev = mlx5_lag_dev(tmp_dev);
2241 
2242 	if (!ldev) {
2243 		ldev = mlx5_lag_dev_alloc(dev);
2244 		if (!ldev) {
2245 			mlx5_core_err(dev, "Failed to alloc lag dev\n");
2246 			return 0;
2247 		}
2248 		err = mlx5_ldev_add_mdev(ldev, dev, 0);
2249 		if (err) {
2250 			mlx5_core_err(dev, "Failed to add mdev to lag dev\n");
2251 			mlx5_ldev_put(ldev);
2252 			return 0;
2253 		}
2254 		return 0;
2255 	}
2256 
2257 	mutex_lock(&ldev->lock);
2258 	if (ldev->mode_changes_in_progress) {
2259 		mutex_unlock(&ldev->lock);
2260 		return -EAGAIN;
2261 	}
2262 	mlx5_ldev_get(ldev);
2263 	err = mlx5_ldev_add_mdev(ldev, dev, 0);
2264 	if (err) {
2265 		mlx5_ldev_put(ldev);
2266 		mutex_unlock(&ldev->lock);
2267 		return err;
2268 	}
2269 	mutex_unlock(&ldev->lock);
2270 
2271 	return 0;
2272 }
2273 
2274 static void mlx5_lag_unregister_hca_devcom_comp(struct mlx5_core_dev *dev)
2275 {
2276 	mlx5_devcom_unregister_component(dev->priv.hca_devcom_comp);
2277 	dev->priv.hca_devcom_comp = NULL;
2278 }
2279 
2280 static int mlx5_lag_register_hca_devcom_comp(struct mlx5_core_dev *dev)
2281 {
2282 	struct mlx5_devcom_match_attr attr = {
2283 		.flags = MLX5_DEVCOM_MATCH_FLAGS_NS,
2284 		.net = mlx5_core_net(dev),
2285 	};
2286 	u8 len __always_unused;
2287 
2288 	mlx5_query_nic_sw_system_image_guid(dev, attr.key.buf, &len);
2289 
2290 	/* This component is use to sync adding core_dev to lag_dev and to sync
2291 	 * changes of mlx5_adev_devices between LAG layer and other layers.
2292 	 */
2293 	dev->priv.hca_devcom_comp =
2294 		mlx5_devcom_register_component(dev->priv.devc,
2295 					       MLX5_DEVCOM_HCA_PORTS,
2296 					       &attr, mlx5_lag_devcom_event,
2297 					       dev);
2298 	if (!dev->priv.hca_devcom_comp) {
2299 		mlx5_core_err(dev,
2300 			      "Failed to register devcom HCA component.");
2301 		return -EINVAL;
2302 	}
2303 
2304 	return 0;
2305 }
2306 
2307 void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
2308 {
2309 	struct mlx5_lag *ldev;
2310 
2311 	ldev = mlx5_lag_dev(dev);
2312 	if (!ldev)
2313 		return;
2314 
2315 	/* mdev is being removed, might as well remove debugfs
2316 	 * as early as possible.
2317 	 */
2318 	mlx5_ldev_remove_debugfs(dev->priv.dbg.lag_debugfs);
2319 recheck:
2320 	mutex_lock(&ldev->lock);
2321 	if (ldev->mode_changes_in_progress) {
2322 		mutex_unlock(&ldev->lock);
2323 		msleep(100);
2324 		goto recheck;
2325 	}
2326 	mlx5_ldev_remove_mdev(ldev, dev);
2327 	mutex_unlock(&ldev->lock);
2328 	/* Send devcom event to notify peers that a device is being removed */
2329 	mlx5_devcom_send_event(dev->priv.hca_devcom_comp,
2330 			       LAG_DEVCOM_UNPAIR, LAG_DEVCOM_UNPAIR, dev);
2331 	mlx5_lag_unregister_hca_devcom_comp(dev);
2332 	mlx5_ldev_put(ldev);
2333 }
2334 
2335 void mlx5_lag_add_mdev(struct mlx5_core_dev *dev)
2336 {
2337 	int err;
2338 
2339 	if (!mlx5_lag_is_supported(dev))
2340 		return;
2341 
2342 	if (mlx5_lag_register_hca_devcom_comp(dev))
2343 		return;
2344 
2345 recheck:
2346 	mlx5_devcom_comp_lock(dev->priv.hca_devcom_comp);
2347 	err = __mlx5_lag_dev_add_mdev(dev);
2348 	mlx5_devcom_comp_unlock(dev->priv.hca_devcom_comp);
2349 
2350 	if (err) {
2351 		msleep(100);
2352 		goto recheck;
2353 	}
2354 	/* Send devcom event to notify peers that a device was added */
2355 	mlx5_devcom_send_event(dev->priv.hca_devcom_comp,
2356 			       LAG_DEVCOM_PAIR, LAG_DEVCOM_UNPAIR, dev);
2357 	mlx5_ldev_add_debugfs(dev);
2358 }
2359 
2360 void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev,
2361 			    struct net_device *netdev)
2362 {
2363 	struct mlx5_lag *ldev;
2364 	bool lag_is_active;
2365 
2366 	ldev = mlx5_lag_dev(dev);
2367 	if (!ldev)
2368 		return;
2369 
2370 	mutex_lock(&ldev->lock);
2371 	mlx5_ldev_remove_netdev(ldev, netdev);
2372 	clear_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
2373 
2374 	lag_is_active = __mlx5_lag_is_active(ldev);
2375 	mutex_unlock(&ldev->lock);
2376 
2377 	if (lag_is_active)
2378 		mlx5_queue_bond_work(ldev, 0);
2379 }
2380 
2381 void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
2382 			 struct net_device *netdev)
2383 {
2384 	struct mlx5_lag *ldev;
2385 	int num = 0;
2386 
2387 	ldev = mlx5_lag_dev(dev);
2388 	if (!ldev)
2389 		return;
2390 
2391 	mutex_lock(&ldev->lock);
2392 	mlx5_ldev_add_netdev(ldev, dev, netdev);
2393 	num = mlx5_lag_num_netdevs(ldev);
2394 	if (num >= ldev->ports)
2395 		set_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
2396 	mutex_unlock(&ldev->lock);
2397 	mlx5_queue_bond_work(ldev, 0);
2398 }
2399 
2400 int mlx5_get_pre_lag_func(struct mlx5_lag *ldev, int start_idx, int end_idx,
2401 			  u32 filter)
2402 {
2403 	struct lag_func *pf;
2404 	int i;
2405 
2406 	for (i = start_idx; i >= end_idx; i--) {
2407 		pf = xa_load(&ldev->pfs, i);
2408 		if (!pf || !pf->dev)
2409 			continue;
2410 		if (filter == MLX5_LAG_FILTER_PORTS) {
2411 			if (xa_get_mark(&ldev->pfs, i, MLX5_LAG_XA_MARK_PORT))
2412 				return i;
2413 		} else if (filter == MLX5_LAG_FILTER_ALL ||
2414 			   filter == pf->group_id) {
2415 			return i;
2416 		}
2417 	}
2418 	return -1;
2419 }
2420 
2421 int mlx5_get_next_lag_func(struct mlx5_lag *ldev, int start_idx, u32 filter)
2422 {
2423 	struct lag_func *pf;
2424 	unsigned long idx;
2425 
2426 	if (filter == MLX5_LAG_FILTER_PORTS) {
2427 		xa_for_each_marked_start(&ldev->pfs, idx, pf,
2428 					 MLX5_LAG_XA_MARK_PORT, start_idx)
2429 			if (pf->dev)
2430 				return idx;
2431 		return MLX5_MAX_PORTS;
2432 	}
2433 
2434 	xa_for_each_start(&ldev->pfs, idx, pf, start_idx) {
2435 		if (!pf->dev)
2436 			continue;
2437 		if (filter == MLX5_LAG_FILTER_ALL ||
2438 		    filter == pf->group_id)
2439 			return idx;
2440 	}
2441 	return MLX5_MAX_PORTS;
2442 }
2443 
2444 bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
2445 {
2446 	struct mlx5_lag *ldev;
2447 	unsigned long flags;
2448 	bool res;
2449 
2450 	spin_lock_irqsave(&lag_lock, flags);
2451 	ldev = mlx5_lag_dev(dev);
2452 	res  = ldev && __mlx5_lag_is_roce(ldev);
2453 	spin_unlock_irqrestore(&lag_lock, flags);
2454 
2455 	return res;
2456 }
2457 EXPORT_SYMBOL(mlx5_lag_is_roce);
2458 
2459 bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
2460 {
2461 	struct mlx5_lag *ldev;
2462 	unsigned long flags;
2463 	bool res;
2464 
2465 	spin_lock_irqsave(&lag_lock, flags);
2466 	ldev = mlx5_lag_dev(dev);
2467 	res  = ldev && (__mlx5_lag_is_active(ldev) ||
2468 			__mlx5_lag_is_sd_active(ldev, dev));
2469 	spin_unlock_irqrestore(&lag_lock, flags);
2470 
2471 	return res;
2472 }
2473 EXPORT_SYMBOL(mlx5_lag_is_active);
2474 
2475 bool mlx5_lag_mode_is_hash(struct mlx5_core_dev *dev)
2476 {
2477 	struct mlx5_lag *ldev;
2478 	unsigned long flags;
2479 	bool res = 0;
2480 
2481 	spin_lock_irqsave(&lag_lock, flags);
2482 	ldev = mlx5_lag_dev(dev);
2483 	if (ldev)
2484 		res = test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags);
2485 	spin_unlock_irqrestore(&lag_lock, flags);
2486 
2487 	return res;
2488 }
2489 EXPORT_SYMBOL(mlx5_lag_mode_is_hash);
2490 
2491 bool mlx5_lag_is_master(struct mlx5_core_dev *dev)
2492 {
2493 	struct mlx5_lag *ldev;
2494 	unsigned long flags;
2495 	struct lag_func *pf;
2496 	bool res = false;
2497 	int idx;
2498 
2499 	spin_lock_irqsave(&lag_lock, flags);
2500 	ldev = mlx5_lag_dev(dev);
2501 	if (ldev) {
2502 		u32 filter;
2503 
2504 		filter = mlx5_lag_get_filter(ldev, dev);
2505 		idx = mlx5_lag_get_dev_index_by_seq_filter(ldev, MLX5_LAG_P1,
2506 							   filter);
2507 		if ((__mlx5_lag_is_active(ldev) ||
2508 		     __mlx5_lag_is_sd_active(ldev, dev)) && idx >= 0) {
2509 			pf = mlx5_lag_pf(ldev, idx);
2510 			res = pf && dev == pf->dev;
2511 		}
2512 	}
2513 	spin_unlock_irqrestore(&lag_lock, flags);
2514 
2515 	return res;
2516 }
2517 EXPORT_SYMBOL(mlx5_lag_is_master);
2518 
2519 bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
2520 {
2521 	struct mlx5_lag *ldev;
2522 	unsigned long flags;
2523 	bool res;
2524 
2525 	spin_lock_irqsave(&lag_lock, flags);
2526 	ldev = mlx5_lag_dev(dev);
2527 	res  = ldev && __mlx5_lag_is_sriov(ldev);
2528 	spin_unlock_irqrestore(&lag_lock, flags);
2529 
2530 	return res;
2531 }
2532 EXPORT_SYMBOL(mlx5_lag_is_sriov);
2533 
2534 bool mlx5_lag_is_sd(struct mlx5_core_dev *dev)
2535 {
2536 	struct mlx5_lag *ldev;
2537 	unsigned long flags;
2538 	bool res;
2539 
2540 	spin_lock_irqsave(&lag_lock, flags);
2541 	ldev = mlx5_lag_dev(dev);
2542 	res  = ldev && __mlx5_lag_is_sd(ldev, dev);
2543 	spin_unlock_irqrestore(&lag_lock, flags);
2544 
2545 	return res;
2546 }
2547 
2548 bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev)
2549 {
2550 	struct mlx5_lag *ldev;
2551 	unsigned long flags;
2552 	bool res = false;
2553 
2554 	spin_lock_irqsave(&lag_lock, flags);
2555 	ldev = mlx5_lag_dev(dev);
2556 	if (ldev) {
2557 		res = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB,
2558 			       &ldev->mode_flags);
2559 		if (__mlx5_lag_is_sd(ldev, dev) && !__mlx5_lag_is_active(ldev))
2560 			res = __mlx5_lag_is_sd_active(ldev, dev);
2561 	}
2562 	spin_unlock_irqrestore(&lag_lock, flags);
2563 
2564 	return res;
2565 }
2566 EXPORT_SYMBOL(mlx5_lag_is_shared_fdb);
2567 
2568 void mlx5_lag_disable_change(struct mlx5_core_dev *dev)
2569 {
2570 	struct mlx5_devcom_comp_dev *sd_devcom = mlx5_sd_get_devcom(dev);
2571 	struct mlx5_core_dev *primary = dev;
2572 	struct mlx5_lag *ldev;
2573 	struct lag_func *pf;
2574 	bool mpesw;
2575 	int i;
2576 
2577 	ldev = mlx5_lag_dev(dev);
2578 	if (!ldev)
2579 		return;
2580 
2581 	if (sd_devcom) {
2582 		mlx5_devcom_comp_lock(sd_devcom);
2583 		primary = mlx5_sd_get_primary(dev) ?: dev;
2584 		mlx5_devcom_comp_unlock(sd_devcom);
2585 	}
2586 	mlx5_devcom_comp_lock(primary->priv.hca_devcom_comp);
2587 	mpesw = ldev->mode == MLX5_LAG_MODE_MPESW;
2588 	if (mpesw)
2589 		mlx5_mpesw_sd_devcoms_lock(ldev);
2590 	mutex_lock(&ldev->lock);
2591 
2592 	ldev->mode_changes_in_progress++;
2593 	if (__mlx5_lag_is_active(ldev)) {
2594 		if (ldev->mode == MLX5_LAG_MODE_MPESW)
2595 			mlx5_lag_disable_mpesw(ldev);
2596 		else
2597 			mlx5_disable_lag(ldev);
2598 	}
2599 
2600 	mutex_unlock(&ldev->lock);
2601 	if (mpesw)
2602 		mlx5_mpesw_sd_devcoms_unlock(ldev);
2603 	mlx5_devcom_comp_unlock(primary->priv.hca_devcom_comp);
2604 
2605 	if (!sd_devcom)
2606 		return;
2607 
2608 	/* Teardown SD shared FDB for this device's group if active */
2609 	mlx5_devcom_comp_lock(sd_devcom);
2610 	mutex_lock(&ldev->lock);
2611 	mlx5_lag_for_each(i, 0, ldev, MLX5_LAG_FILTER_ALL) {
2612 		pf = mlx5_lag_pf(ldev, i);
2613 		if (pf->dev == dev && pf->sd_fdb_active) {
2614 			mlx5_lag_shared_fdb_destroy(ldev, pf->group_id);
2615 			break;
2616 		}
2617 	}
2618 	mutex_unlock(&ldev->lock);
2619 	mlx5_devcom_comp_unlock(sd_devcom);
2620 }
2621 
2622 void mlx5_lag_enable_change(struct mlx5_core_dev *dev)
2623 {
2624 	struct mlx5_lag *ldev;
2625 
2626 	ldev = mlx5_lag_dev(dev);
2627 	if (!ldev)
2628 		return;
2629 
2630 	mutex_lock(&ldev->lock);
2631 	ldev->mode_changes_in_progress--;
2632 	mutex_unlock(&ldev->lock);
2633 	mlx5_queue_bond_work(ldev, 0);
2634 }
2635 
2636 u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
2637 			   struct net_device *slave)
2638 {
2639 	struct mlx5_lag *ldev;
2640 	unsigned long flags;
2641 	struct lag_func *pf;
2642 	u8 port = 0;
2643 	int i;
2644 
2645 	spin_lock_irqsave(&lag_lock, flags);
2646 	ldev = mlx5_lag_dev(dev);
2647 	if (!(ldev && __mlx5_lag_is_roce(ldev)))
2648 		goto unlock;
2649 
2650 	mlx5_ldev_for_each(i, 0, ldev) {
2651 		pf = mlx5_lag_pf(ldev, i);
2652 		if (pf->netdev == slave) {
2653 			port = i;
2654 			break;
2655 		}
2656 	}
2657 
2658 	port = ldev->v2p_map[port * ldev->buckets];
2659 
2660 unlock:
2661 	spin_unlock_irqrestore(&lag_lock, flags);
2662 	return port;
2663 }
2664 EXPORT_SYMBOL(mlx5_lag_get_slave_port);
2665 
2666 u8 mlx5_lag_get_num_ports(struct mlx5_core_dev *dev)
2667 {
2668 	struct mlx5_lag *ldev;
2669 
2670 	ldev = mlx5_lag_dev(dev);
2671 	if (!ldev)
2672 		return 0;
2673 
2674 	return ldev->ports;
2675 }
2676 EXPORT_SYMBOL(mlx5_lag_get_num_ports);
2677 
2678 struct mlx5_core_dev *mlx5_lag_get_next_peer_mdev(struct mlx5_core_dev *dev, int *i)
2679 {
2680 	struct mlx5_core_dev *peer_dev = NULL;
2681 	struct mlx5_lag *ldev;
2682 	unsigned long flags;
2683 	struct lag_func *pf;
2684 	int idx;
2685 
2686 	spin_lock_irqsave(&lag_lock, flags);
2687 	ldev = mlx5_lag_dev(dev);
2688 	if (!ldev)
2689 		goto unlock;
2690 
2691 	if (*i == MLX5_MAX_PORTS)
2692 		goto unlock;
2693 	mlx5_lag_for_each(idx, *i, ldev, mlx5_lag_get_filter(ldev, dev)) {
2694 		pf = mlx5_lag_pf(ldev, idx);
2695 		if (pf->dev != dev)
2696 			break;
2697 	}
2698 
2699 	if (idx == MLX5_MAX_PORTS) {
2700 		*i = idx;
2701 		goto unlock;
2702 	}
2703 	*i = idx + 1;
2704 
2705 	pf = mlx5_lag_pf(ldev, idx);
2706 	peer_dev = pf->dev;
2707 
2708 unlock:
2709 	spin_unlock_irqrestore(&lag_lock, flags);
2710 	return peer_dev;
2711 }
2712 EXPORT_SYMBOL(mlx5_lag_get_next_peer_mdev);
2713 
2714 int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
2715 				 u64 *values,
2716 				 int num_counters,
2717 				 size_t *offsets)
2718 {
2719 	int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
2720 	struct mlx5_core_dev **mdev;
2721 	int ret = 0, i, j, idx = 0;
2722 	struct mlx5_lag *ldev;
2723 	unsigned long flags;
2724 	struct lag_func *pf;
2725 	int num_ports;
2726 	void *out;
2727 
2728 	out = kvzalloc(outlen, GFP_KERNEL);
2729 	if (!out)
2730 		return -ENOMEM;
2731 
2732 	mdev = kvzalloc(sizeof(mdev[0]) * MLX5_MAX_PORTS, GFP_KERNEL);
2733 	if (!mdev) {
2734 		ret = -ENOMEM;
2735 		goto free_out;
2736 	}
2737 
2738 	memset(values, 0, sizeof(*values) * num_counters);
2739 
2740 	spin_lock_irqsave(&lag_lock, flags);
2741 	ldev = mlx5_lag_dev(dev);
2742 	if (ldev && __mlx5_lag_is_active(ldev)) {
2743 		num_ports = ldev->ports;
2744 		mlx5_ldev_for_each(i, 0, ldev) {
2745 			pf = mlx5_lag_pf(ldev, i);
2746 			mdev[idx++] = pf->dev;
2747 		}
2748 	} else {
2749 		num_ports = 1;
2750 		mdev[MLX5_LAG_P1] = dev;
2751 	}
2752 	spin_unlock_irqrestore(&lag_lock, flags);
2753 
2754 	for (i = 0; i < num_ports; ++i) {
2755 		u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {};
2756 
2757 		MLX5_SET(query_cong_statistics_in, in, opcode,
2758 			 MLX5_CMD_OP_QUERY_CONG_STATISTICS);
2759 		ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in,
2760 					  out);
2761 		if (ret)
2762 			goto free_mdev;
2763 
2764 		for (j = 0; j < num_counters; ++j)
2765 			values[j] += be64_to_cpup((__be64 *)(out + offsets[j]));
2766 	}
2767 
2768 free_mdev:
2769 	kvfree(mdev);
2770 free_out:
2771 	kvfree(out);
2772 	return ret;
2773 }
2774 EXPORT_SYMBOL(mlx5_lag_query_cong_counters);
2775