xref: /linux/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c (revision 68993ced0f618e36cf33388f1e50223e5e6e78cc)
1 /*
2  * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 #include <linux/netdevice.h>
34 #include <net/bonding.h>
35 #include <linux/mlx5/driver.h>
36 #include <linux/mlx5/eswitch.h>
37 #include <linux/mlx5/vport.h>
38 #include <linux/mlx5/lag.h>
39 #include "lib/mlx5.h"
40 #include "lib/devcom.h"
41 #include "mlx5_core.h"
42 #include "eswitch.h"
43 #include "esw/acl/ofld.h"
44 #include "lag.h"
45 #include "mp.h"
46 #include "mpesw.h"
47 
48 
49 /* General purpose, use for short periods of time.
50  * Beware of lock dependencies (preferably, no locks should be acquired
51  * under it).
52  */
53 static DEFINE_SPINLOCK(lag_lock);
54 
get_port_sel_mode(enum mlx5_lag_mode mode,unsigned long flags)55 static int get_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags)
56 {
57 	if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
58 		return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT;
59 
60 	if (mode == MLX5_LAG_MODE_MPESW)
61 		return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW;
62 
63 	return MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY;
64 }
65 
lag_active_port_bits(struct mlx5_lag * ldev)66 static u8 lag_active_port_bits(struct mlx5_lag *ldev)
67 {
68 	u8 enabled_ports[MLX5_MAX_PORTS] = {};
69 	u8 active_port = 0;
70 	int num_enabled;
71 	int idx;
72 
73 	mlx5_infer_tx_enabled(&ldev->tracker, ldev, enabled_ports,
74 			      &num_enabled);
75 	for (idx = 0; idx < num_enabled; idx++)
76 		active_port |= BIT_MASK(enabled_ports[idx]);
77 
78 	return active_port;
79 }
80 
mlx5_cmd_create_lag(struct mlx5_core_dev * dev,struct mlx5_lag * ldev,int mode,unsigned long flags)81 static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, struct mlx5_lag *ldev,
82 			       int mode, unsigned long flags)
83 {
84 	bool fdb_sel_mode = test_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE,
85 				     &flags);
86 	int port_sel_mode = get_port_sel_mode(mode, flags);
87 	u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {};
88 	u8 *ports = ldev->v2p_map;
89 	int idx0, idx1;
90 	void *lag_ctx;
91 
92 	lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
93 	MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG);
94 	MLX5_SET(lagc, lag_ctx, fdb_selection_mode, fdb_sel_mode);
95 	idx0 = mlx5_lag_get_dev_index_by_seq(ldev, 0);
96 	idx1 = mlx5_lag_get_dev_index_by_seq(ldev, 1);
97 
98 	if (idx0 < 0 || idx1 < 0)
99 		return -EINVAL;
100 
101 	switch (port_sel_mode) {
102 	case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY:
103 		MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[idx0]);
104 		MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[idx1]);
105 		break;
106 	case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT:
107 		if (!MLX5_CAP_PORT_SELECTION(dev, port_select_flow_table_bypass))
108 			break;
109 
110 		MLX5_SET(lagc, lag_ctx, active_port,
111 			 lag_active_port_bits(mlx5_lag_dev(dev)));
112 		break;
113 	default:
114 		break;
115 	}
116 	MLX5_SET(lagc, lag_ctx, port_select_mode, port_sel_mode);
117 
118 	return mlx5_cmd_exec_in(dev, create_lag, in);
119 }
120 
mlx5_cmd_modify_lag(struct mlx5_core_dev * dev,struct mlx5_lag * ldev,u8 * ports)121 static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, struct mlx5_lag *ldev,
122 			       u8 *ports)
123 {
124 	u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
125 	void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
126 	int idx0, idx1;
127 
128 	idx0 = mlx5_lag_get_dev_index_by_seq(ldev, 0);
129 	idx1 = mlx5_lag_get_dev_index_by_seq(ldev, 1);
130 	if (idx0 < 0 || idx1 < 0)
131 		return -EINVAL;
132 
133 	MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
134 	MLX5_SET(modify_lag_in, in, field_select, 0x1);
135 
136 	MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[idx0]);
137 	MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[idx1]);
138 
139 	return mlx5_cmd_exec_in(dev, modify_lag, in);
140 }
141 
mlx5_cmd_create_vport_lag(struct mlx5_core_dev * dev)142 int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev)
143 {
144 	u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {};
145 
146 	MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG);
147 
148 	return mlx5_cmd_exec_in(dev, create_vport_lag, in);
149 }
150 EXPORT_SYMBOL(mlx5_cmd_create_vport_lag);
151 
mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev * dev)152 int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
153 {
154 	u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {};
155 
156 	MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG);
157 
158 	return mlx5_cmd_exec_in(dev, destroy_vport_lag, in);
159 }
160 EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
161 
mlx5_infer_tx_disabled(struct lag_tracker * tracker,struct mlx5_lag * ldev,u8 * ports,int * num_disabled)162 static void mlx5_infer_tx_disabled(struct lag_tracker *tracker, struct mlx5_lag *ldev,
163 				   u8 *ports, int *num_disabled)
164 {
165 	int i;
166 
167 	*num_disabled = 0;
168 	mlx5_ldev_for_each(i, 0, ldev)
169 		if (!tracker->netdev_state[i].tx_enabled ||
170 		    !tracker->netdev_state[i].link_up)
171 			ports[(*num_disabled)++] = i;
172 }
173 
mlx5_infer_tx_enabled(struct lag_tracker * tracker,struct mlx5_lag * ldev,u8 * ports,int * num_enabled)174 void mlx5_infer_tx_enabled(struct lag_tracker *tracker, struct mlx5_lag *ldev,
175 			   u8 *ports, int *num_enabled)
176 {
177 	int i;
178 
179 	*num_enabled = 0;
180 	mlx5_ldev_for_each(i, 0, ldev)
181 		if (tracker->netdev_state[i].tx_enabled &&
182 		    tracker->netdev_state[i].link_up)
183 			ports[(*num_enabled)++] = i;
184 
185 	if (*num_enabled == 0)
186 		mlx5_infer_tx_disabled(tracker, ldev, ports, num_enabled);
187 }
188 
mlx5_lag_print_mapping(struct mlx5_core_dev * dev,struct mlx5_lag * ldev,struct lag_tracker * tracker,unsigned long flags)189 static void mlx5_lag_print_mapping(struct mlx5_core_dev *dev,
190 				   struct mlx5_lag *ldev,
191 				   struct lag_tracker *tracker,
192 				   unsigned long flags)
193 {
194 	char buf[MLX5_MAX_PORTS * 10 + 1] = {};
195 	u8 enabled_ports[MLX5_MAX_PORTS] = {};
196 	int written = 0;
197 	int num_enabled;
198 	int idx;
199 	int err;
200 	int i;
201 	int j;
202 
203 	if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
204 		mlx5_infer_tx_enabled(tracker, ldev, enabled_ports,
205 				      &num_enabled);
206 		for (i = 0; i < num_enabled; i++) {
207 			err = scnprintf(buf + written, 4, "%d, ", enabled_ports[i] + 1);
208 			if (err != 3)
209 				return;
210 			written += err;
211 		}
212 		buf[written - 2] = 0;
213 		mlx5_core_info(dev, "lag map active ports: %s\n", buf);
214 	} else {
215 		mlx5_ldev_for_each(i, 0, ldev) {
216 			for (j  = 0; j < ldev->buckets; j++) {
217 				idx = i * ldev->buckets + j;
218 				err = scnprintf(buf + written, 10,
219 						" port %d:%d", i + 1, ldev->v2p_map[idx]);
220 				if (err != 9)
221 					return;
222 				written += err;
223 			}
224 		}
225 		mlx5_core_info(dev, "lag map:%s\n", buf);
226 	}
227 }
228 
229 static int mlx5_lag_netdev_event(struct notifier_block *this,
230 				 unsigned long event, void *ptr);
231 static void mlx5_do_bond_work(struct work_struct *work);
232 
mlx5_ldev_free(struct kref * ref)233 static void mlx5_ldev_free(struct kref *ref)
234 {
235 	struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref);
236 	struct lag_func *pf;
237 	struct net *net;
238 	int i;
239 
240 	if (ldev->nb.notifier_call) {
241 		net = read_pnet(&ldev->net);
242 		unregister_netdevice_notifier_net(net, &ldev->nb);
243 	}
244 
245 	mlx5_ldev_for_each(i, 0, ldev) {
246 		pf = mlx5_lag_pf(ldev, i);
247 		if (pf->port_change_nb.nb.notifier_call) {
248 			struct mlx5_nb *nb = &pf->port_change_nb;
249 
250 			mlx5_eq_notifier_unregister(pf->dev, nb);
251 		}
252 		xa_erase(&ldev->pfs, i);
253 		kfree(pf);
254 	}
255 	xa_destroy(&ldev->pfs);
256 
257 	mlx5_lag_mp_cleanup(ldev);
258 	cancel_delayed_work_sync(&ldev->bond_work);
259 	cancel_work_sync(&ldev->speed_update_work);
260 	destroy_workqueue(ldev->wq);
261 	mutex_destroy(&ldev->lock);
262 	kfree(ldev);
263 }
264 
mlx5_ldev_put(struct mlx5_lag * ldev)265 static void mlx5_ldev_put(struct mlx5_lag *ldev)
266 {
267 	kref_put(&ldev->ref, mlx5_ldev_free);
268 }
269 
mlx5_ldev_get(struct mlx5_lag * ldev)270 static void mlx5_ldev_get(struct mlx5_lag *ldev)
271 {
272 	kref_get(&ldev->ref);
273 }
274 
mlx5_lag_dev_alloc(struct mlx5_core_dev * dev)275 static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev)
276 {
277 	struct mlx5_lag *ldev;
278 	int err;
279 
280 	ldev = kzalloc_obj(*ldev);
281 	if (!ldev)
282 		return NULL;
283 
284 	ldev->wq = create_singlethread_workqueue("mlx5_lag");
285 	if (!ldev->wq) {
286 		kfree(ldev);
287 		return NULL;
288 	}
289 
290 	kref_init(&ldev->ref);
291 	mutex_init(&ldev->lock);
292 	xa_init_flags(&ldev->pfs, XA_FLAGS_ALLOC);
293 	INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
294 	INIT_WORK(&ldev->speed_update_work, mlx5_mpesw_speed_update_work);
295 
296 	ldev->nb.notifier_call = mlx5_lag_netdev_event;
297 	write_pnet(&ldev->net, mlx5_core_net(dev));
298 	if (register_netdevice_notifier_net(read_pnet(&ldev->net), &ldev->nb)) {
299 		ldev->nb.notifier_call = NULL;
300 		mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
301 	}
302 	ldev->mode = MLX5_LAG_MODE_NONE;
303 
304 	err = mlx5_lag_mp_init(ldev);
305 	if (err)
306 		mlx5_core_err(dev, "Failed to init multipath lag err=%d\n",
307 			      err);
308 
309 	ldev->ports = MLX5_CAP_GEN(dev, num_lag_ports);
310 	ldev->buckets = 1;
311 
312 	return ldev;
313 }
314 
mlx5_lag_dev_get_netdev_idx(struct mlx5_lag * ldev,struct net_device * ndev)315 int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
316 				struct net_device *ndev)
317 {
318 	struct lag_func *pf;
319 	int i;
320 
321 	mlx5_ldev_for_each(i, 0, ldev) {
322 		pf = mlx5_lag_pf(ldev, i);
323 		if (pf->netdev == ndev)
324 			return i;
325 	}
326 
327 	return -ENOENT;
328 }
329 
mlx5_lag_get_master_idx(struct mlx5_lag * ldev)330 static int mlx5_lag_get_master_idx(struct mlx5_lag *ldev)
331 {
332 	unsigned long idx = 0;
333 	void *entry;
334 
335 	if (!ldev)
336 		return -ENOENT;
337 
338 	entry = xa_find(&ldev->pfs, &idx, U8_MAX, MLX5_LAG_XA_MARK_MASTER);
339 	if (!entry)
340 		return -ENOENT;
341 
342 	return (int)idx;
343 }
344 
mlx5_lag_get_dev_index_by_seq(struct mlx5_lag * ldev,int seq)345 int mlx5_lag_get_dev_index_by_seq(struct mlx5_lag *ldev, int seq)
346 {
347 	int master_idx, i, num = 0;
348 
349 	if (!ldev)
350 		return -ENOENT;
351 
352 	master_idx = mlx5_lag_get_master_idx(ldev);
353 
354 	/* If seq 0 is requested and there's a primary PF, return it */
355 	if (master_idx >= 0) {
356 		if (seq == 0)
357 			return master_idx;
358 		num++;
359 	}
360 
361 	mlx5_ldev_for_each(i, 0, ldev) {
362 		/* Skip the primary PF in the loop */
363 		if (i == master_idx)
364 			continue;
365 
366 		if (num == seq)
367 			return i;
368 		num++;
369 	}
370 	return -ENOENT;
371 }
372 
373 /* Reverse of mlx5_lag_get_dev_index_by_seq: given a device, return its
374  * sequence number in the LAG. Master is always 0, others numbered
375  * sequentially starting from 1.
376  */
mlx5_lag_get_dev_seq(struct mlx5_core_dev * dev)377 int mlx5_lag_get_dev_seq(struct mlx5_core_dev *dev)
378 {
379 	struct mlx5_lag *ldev = mlx5_lag_dev(dev);
380 	int master_idx, i, num = 1;
381 	struct lag_func *pf;
382 
383 	if (!ldev)
384 		return -ENOENT;
385 
386 	master_idx = mlx5_lag_get_master_idx(ldev);
387 	if (master_idx < 0)
388 		return -ENOENT;
389 
390 	pf = mlx5_lag_pf(ldev, master_idx);
391 	if (pf && pf->dev == dev)
392 		return 0;
393 
394 	mlx5_ldev_for_each(i, 0, ldev) {
395 		if (i == master_idx)
396 			continue;
397 		pf = mlx5_lag_pf(ldev, i);
398 		if (pf->dev == dev)
399 			return num;
400 		num++;
401 	}
402 	return -ENOENT;
403 }
404 EXPORT_SYMBOL(mlx5_lag_get_dev_seq);
405 
406 /* Devcom events for LAG master marking */
407 #define LAG_DEVCOM_PAIR		(0)
408 #define LAG_DEVCOM_UNPAIR	(1)
409 
mlx5_lag_mark_master(struct mlx5_lag * ldev)410 static void mlx5_lag_mark_master(struct mlx5_lag *ldev)
411 {
412 	int lowest_dev_idx = INT_MAX;
413 	struct lag_func *pf;
414 	int master_xa_idx = -1;
415 	int dev_idx;
416 	int i;
417 
418 	mlx5_ldev_for_each(i, 0, ldev) {
419 		pf = mlx5_lag_pf(ldev, i);
420 		dev_idx = mlx5_get_dev_index(pf->dev);
421 		if (dev_idx < lowest_dev_idx) {
422 			lowest_dev_idx = dev_idx;
423 			master_xa_idx = i;
424 		}
425 	}
426 
427 	if (master_xa_idx >= 0)
428 		xa_set_mark(&ldev->pfs, master_xa_idx, MLX5_LAG_XA_MARK_MASTER);
429 }
430 
mlx5_lag_clear_master(struct mlx5_lag * ldev)431 static void mlx5_lag_clear_master(struct mlx5_lag *ldev)
432 {
433 	unsigned long idx = 0;
434 	void *entry;
435 
436 	entry = xa_find(&ldev->pfs, &idx, U8_MAX, MLX5_LAG_XA_MARK_MASTER);
437 	if (!entry)
438 		return;
439 
440 	xa_clear_mark(&ldev->pfs, idx, MLX5_LAG_XA_MARK_MASTER);
441 }
442 
443 /* Devcom event handler to manage LAG master marking */
mlx5_lag_devcom_event(int event,void * my_data,void * event_data)444 static int mlx5_lag_devcom_event(int event, void *my_data, void *event_data)
445 {
446 	struct mlx5_core_dev *dev = my_data;
447 	struct mlx5_lag *ldev;
448 	int idx;
449 
450 	ldev = mlx5_lag_dev(dev);
451 	if (!ldev)
452 		return 0;
453 
454 	mutex_lock(&ldev->lock);
455 	switch (event) {
456 	case LAG_DEVCOM_PAIR:
457 		/* No need to mark more than once */
458 		idx = mlx5_lag_get_master_idx(ldev);
459 		if (idx >= 0)
460 			break;
461 		/* Check if all LAG ports are now registered */
462 		if (mlx5_lag_num_devs(ldev) == ldev->ports)
463 			mlx5_lag_mark_master(ldev);
464 		break;
465 
466 	case LAG_DEVCOM_UNPAIR:
467 		/* Clear master mark when a device is removed */
468 		mlx5_lag_clear_master(ldev);
469 		break;
470 	}
471 	mutex_unlock(&ldev->lock);
472 	return 0;
473 }
474 
mlx5_lag_num_devs(struct mlx5_lag * ldev)475 int mlx5_lag_num_devs(struct mlx5_lag *ldev)
476 {
477 	int i, num = 0;
478 
479 	if (!ldev)
480 		return 0;
481 
482 	mlx5_ldev_for_each(i, 0, ldev) {
483 		(void)i;
484 		num++;
485 	}
486 	return num;
487 }
488 
mlx5_lag_num_netdevs(struct mlx5_lag * ldev)489 int mlx5_lag_num_netdevs(struct mlx5_lag *ldev)
490 {
491 	struct lag_func *pf;
492 	int i, num = 0;
493 
494 	if (!ldev)
495 		return 0;
496 
497 	mlx5_ldev_for_each(i, 0, ldev) {
498 		pf = mlx5_lag_pf(ldev, i);
499 		if (pf->netdev)
500 			num++;
501 	}
502 	return num;
503 }
504 
__mlx5_lag_is_roce(struct mlx5_lag * ldev)505 static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev)
506 {
507 	return ldev->mode == MLX5_LAG_MODE_ROCE;
508 }
509 
__mlx5_lag_is_sriov(struct mlx5_lag * ldev)510 static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev)
511 {
512 	return ldev->mode == MLX5_LAG_MODE_SRIOV;
513 }
514 
515 /* Create a mapping between steering slots and active ports.
516  * As we have ldev->buckets slots per port first assume the native
517  * mapping should be used.
518  * If there are ports that are disabled fill the relevant slots
519  * with mapping that points to active ports.
520  */
mlx5_infer_tx_affinity_mapping(struct lag_tracker * tracker,struct mlx5_lag * ldev,u8 buckets,u8 * ports)521 static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
522 					   struct mlx5_lag *ldev,
523 					   u8 buckets,
524 					   u8 *ports)
525 {
526 	int disabled[MLX5_MAX_PORTS] = {};
527 	int enabled[MLX5_MAX_PORTS] = {};
528 	int disabled_ports_num = 0;
529 	int enabled_ports_num = 0;
530 	int idx;
531 	u32 rand;
532 	int i;
533 	int j;
534 
535 	mlx5_ldev_for_each(i, 0, ldev) {
536 		if (tracker->netdev_state[i].tx_enabled &&
537 		    tracker->netdev_state[i].link_up)
538 			enabled[enabled_ports_num++] = i;
539 		else
540 			disabled[disabled_ports_num++] = i;
541 	}
542 
543 	/* Use native mapping by default where each port's buckets
544 	 * point the native port: 1 1 1 .. 1 2 2 2 ... 2 3 3 3 ... 3 etc
545 	 * ports[] values are 1-indexed device indices for FW.
546 	 */
547 	mlx5_ldev_for_each(i, 0, ldev) {
548 		for (j = 0; j < buckets; j++) {
549 			idx = i * buckets + j;
550 			ports[idx] = mlx5_lag_xa_to_dev_idx(ldev, i) + 1;
551 		}
552 	}
553 
554 	/* If all ports are disabled/enabled keep native mapping */
555 	if (enabled_ports_num == ldev->ports ||
556 	    disabled_ports_num == ldev->ports)
557 		return;
558 
559 	/* Go over the disabled ports and for each assign a random active port */
560 	for (i = 0; i < disabled_ports_num; i++) {
561 		for (j = 0; j < buckets; j++) {
562 			int rand_xa_idx;
563 
564 			get_random_bytes(&rand, 4);
565 			rand_xa_idx = enabled[rand % enabled_ports_num];
566 			ports[disabled[i] * buckets + j] =
567 				mlx5_lag_xa_to_dev_idx(ldev, rand_xa_idx) + 1;
568 		}
569 	}
570 }
571 
mlx5_lag_has_drop_rule(struct mlx5_lag * ldev)572 static bool mlx5_lag_has_drop_rule(struct mlx5_lag *ldev)
573 {
574 	struct lag_func *pf;
575 	int i;
576 
577 	mlx5_ldev_for_each(i, 0, ldev) {
578 		pf = mlx5_lag_pf(ldev, i);
579 		if (pf->has_drop)
580 			return true;
581 	}
582 	return false;
583 }
584 
mlx5_lag_drop_rule_cleanup(struct mlx5_lag * ldev)585 static void mlx5_lag_drop_rule_cleanup(struct mlx5_lag *ldev)
586 {
587 	struct lag_func *pf;
588 	int i;
589 
590 	mlx5_ldev_for_each(i, 0, ldev) {
591 		pf = mlx5_lag_pf(ldev, i);
592 		if (!pf->has_drop)
593 			continue;
594 
595 		mlx5_esw_acl_ingress_vport_drop_rule_destroy(pf->dev->priv.eswitch,
596 							     MLX5_VPORT_UPLINK);
597 		pf->has_drop = false;
598 	}
599 }
600 
mlx5_lag_drop_rule_setup(struct mlx5_lag * ldev,struct lag_tracker * tracker)601 static void mlx5_lag_drop_rule_setup(struct mlx5_lag *ldev,
602 				     struct lag_tracker *tracker)
603 {
604 	u8 disabled_ports[MLX5_MAX_PORTS] = {};
605 	struct mlx5_core_dev *dev;
606 	struct lag_func *pf;
607 	int disabled_index;
608 	int num_disabled;
609 	int err;
610 	int i;
611 
612 	/* First delete the current drop rule so there won't be any dropped
613 	 * packets
614 	 */
615 	mlx5_lag_drop_rule_cleanup(ldev);
616 
617 	if (!ldev->tracker.has_inactive)
618 		return;
619 
620 	mlx5_infer_tx_disabled(tracker, ldev, disabled_ports, &num_disabled);
621 
622 	for (i = 0; i < num_disabled; i++) {
623 		disabled_index = disabled_ports[i];
624 		pf = mlx5_lag_pf(ldev, disabled_index);
625 		dev = pf->dev;
626 		err = mlx5_esw_acl_ingress_vport_drop_rule_create(dev->priv.eswitch,
627 								  MLX5_VPORT_UPLINK);
628 		if (!err)
629 			pf->has_drop = true;
630 		else
631 			mlx5_core_err(dev,
632 				      "Failed to create lag drop rule, error: %d", err);
633 	}
634 }
635 
mlx5_cmd_modify_active_port(struct mlx5_core_dev * dev,u8 ports)636 static int mlx5_cmd_modify_active_port(struct mlx5_core_dev *dev, u8 ports)
637 {
638 	u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
639 	void *lag_ctx;
640 
641 	lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
642 
643 	MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
644 	MLX5_SET(modify_lag_in, in, field_select, 0x2);
645 
646 	MLX5_SET(lagc, lag_ctx, active_port, ports);
647 
648 	return mlx5_cmd_exec_in(dev, modify_lag, in);
649 }
650 
_mlx5_modify_lag(struct mlx5_lag * ldev,u8 * ports)651 static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports)
652 {
653 	int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
654 	struct mlx5_core_dev *dev0;
655 	u8 active_ports;
656 	int ret;
657 
658 	if (idx < 0)
659 		return -EINVAL;
660 
661 	dev0 = mlx5_lag_pf(ldev, idx)->dev;
662 	if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags)) {
663 		ret = mlx5_lag_port_sel_modify(ldev, ports);
664 		if (ret ||
665 		    !MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table_bypass))
666 			return ret;
667 
668 		active_ports = lag_active_port_bits(ldev);
669 
670 		return mlx5_cmd_modify_active_port(dev0, active_ports);
671 	}
672 	return mlx5_cmd_modify_lag(dev0, ldev, ports);
673 }
674 
mlx5_lag_active_backup_get_netdev(struct mlx5_core_dev * dev)675 static struct net_device *mlx5_lag_active_backup_get_netdev(struct mlx5_core_dev *dev)
676 {
677 	struct net_device *ndev = NULL;
678 	struct lag_func *pf;
679 	struct mlx5_lag *ldev;
680 	unsigned long flags;
681 	int i, last_idx;
682 
683 	spin_lock_irqsave(&lag_lock, flags);
684 	ldev = mlx5_lag_dev(dev);
685 
686 	if (!ldev)
687 		goto unlock;
688 
689 	mlx5_ldev_for_each(i, 0, ldev) {
690 		pf = mlx5_lag_pf(ldev, i);
691 		if (ldev->tracker.netdev_state[i].tx_enabled)
692 			ndev = pf->netdev;
693 	}
694 	if (!ndev) {
695 		last_idx = mlx5_lag_get_dev_index_by_seq(ldev, ldev->ports - 1);
696 		if (last_idx < 0)
697 			goto unlock;
698 		pf = mlx5_lag_pf(ldev, last_idx);
699 		ndev = pf->netdev;
700 	}
701 
702 	dev_hold(ndev);
703 
704 unlock:
705 	spin_unlock_irqrestore(&lag_lock, flags);
706 
707 	return ndev;
708 }
709 
mlx5_modify_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker)710 void mlx5_modify_lag(struct mlx5_lag *ldev,
711 		     struct lag_tracker *tracker)
712 {
713 	int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
714 	u8 ports[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS] = {};
715 	struct mlx5_core_dev *dev0;
716 	int idx;
717 	int err;
718 	int i;
719 	int j;
720 
721 	if (first_idx < 0)
722 		return;
723 
724 	dev0 = mlx5_lag_pf(ldev, first_idx)->dev;
725 	mlx5_infer_tx_affinity_mapping(tracker, ldev, ldev->buckets, ports);
726 
727 	mlx5_ldev_for_each(i, 0, ldev) {
728 		for (j = 0; j < ldev->buckets; j++) {
729 			idx = i * ldev->buckets + j;
730 			if (ports[idx] == ldev->v2p_map[idx])
731 				continue;
732 			err = _mlx5_modify_lag(ldev, ports);
733 			if (err) {
734 				mlx5_core_err(dev0,
735 					      "Failed to modify LAG (%d)\n",
736 					      err);
737 				return;
738 			}
739 			memcpy(ldev->v2p_map, ports, sizeof(ports));
740 
741 			mlx5_lag_print_mapping(dev0, ldev, tracker,
742 					       ldev->mode_flags);
743 			break;
744 		}
745 	}
746 
747 	if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
748 		struct net_device *ndev = mlx5_lag_active_backup_get_netdev(dev0);
749 
750 		if(!(ldev->mode == MLX5_LAG_MODE_ROCE))
751 			mlx5_lag_drop_rule_setup(ldev, tracker);
752 		/** Only sriov and roce lag should have tracker->tx_type set so
753 		 *  no need to check the mode
754 		 */
755 		blocking_notifier_call_chain(&dev0->priv.lag_nh,
756 					     MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE,
757 					     ndev);
758 		dev_put(ndev);
759 	}
760 }
761 
mlx5_lag_set_port_sel_mode(struct mlx5_lag * ldev,enum mlx5_lag_mode mode,unsigned long * flags)762 static int mlx5_lag_set_port_sel_mode(struct mlx5_lag *ldev,
763 				      enum mlx5_lag_mode mode,
764 				      unsigned long *flags)
765 {
766 	int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
767 	struct mlx5_core_dev *dev0;
768 
769 	if (first_idx < 0)
770 		return -EINVAL;
771 
772 	if (mode == MLX5_LAG_MODE_MPESW ||
773 	    mode == MLX5_LAG_MODE_MULTIPATH)
774 		return 0;
775 
776 	dev0 = mlx5_lag_pf(ldev, first_idx)->dev;
777 
778 	if (!MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table)) {
779 		if (ldev->ports > 2)
780 			return -EINVAL;
781 		return 0;
782 	}
783 
784 	if (ldev->ports > 2)
785 		ldev->buckets = MLX5_LAG_MAX_HASH_BUCKETS;
786 
787 	set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags);
788 
789 	return 0;
790 }
791 
mlx5_lag_set_flags(struct mlx5_lag * ldev,enum mlx5_lag_mode mode,struct lag_tracker * tracker,bool shared_fdb,unsigned long * flags)792 static int mlx5_lag_set_flags(struct mlx5_lag *ldev, enum mlx5_lag_mode mode,
793 			      struct lag_tracker *tracker, bool shared_fdb,
794 			      unsigned long *flags)
795 {
796 	*flags = 0;
797 	if (shared_fdb) {
798 		set_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, flags);
799 		set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags);
800 	}
801 
802 	if (mode == MLX5_LAG_MODE_MPESW)
803 		set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags);
804 
805 	return mlx5_lag_set_port_sel_mode(ldev, mode, flags);
806 }
807 
mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode,unsigned long flags)808 char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags)
809 {
810 	int port_sel_mode = get_port_sel_mode(mode, flags);
811 
812 	switch (port_sel_mode) {
813 	case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY: return "queue_affinity";
814 	case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT: return "hash";
815 	case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW: return "mpesw";
816 	default: return "invalid";
817 	}
818 }
819 
mlx5_lag_create_single_fdb(struct mlx5_lag * ldev)820 static int mlx5_lag_create_single_fdb(struct mlx5_lag *ldev)
821 {
822 	int master_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
823 	struct mlx5_eswitch *master_esw;
824 	struct mlx5_core_dev *dev0;
825 	int i, j;
826 	int err;
827 
828 	if (master_idx < 0)
829 		return -EINVAL;
830 
831 	dev0 = mlx5_lag_pf(ldev, master_idx)->dev;
832 	master_esw = dev0->priv.eswitch;
833 	mlx5_ldev_for_each(i, 0, ldev) {
834 		struct mlx5_eswitch *slave_esw;
835 
836 		if (i == master_idx)
837 			continue;
838 
839 		slave_esw = mlx5_lag_pf(ldev, i)->dev->priv.eswitch;
840 
841 		err = mlx5_eswitch_offloads_single_fdb_add_one(master_esw,
842 							       slave_esw, ldev->ports);
843 		if (err)
844 			goto err;
845 	}
846 	return 0;
847 err:
848 	mlx5_ldev_for_each_reverse(j, i, 0, ldev) {
849 		if (j == master_idx)
850 			continue;
851 		mlx5_eswitch_offloads_single_fdb_del_one(master_esw,
852 							 mlx5_lag_pf(ldev, j)->dev->priv.eswitch);
853 	}
854 	return err;
855 }
856 
mlx5_create_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker,enum mlx5_lag_mode mode,unsigned long flags)857 static int mlx5_create_lag(struct mlx5_lag *ldev,
858 			   struct lag_tracker *tracker,
859 			   enum mlx5_lag_mode mode,
860 			   unsigned long flags)
861 {
862 	int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
863 	bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
864 	u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
865 	struct mlx5_core_dev *dev0;
866 	int err;
867 
868 	if (first_idx < 0)
869 		return -EINVAL;
870 
871 	dev0 = mlx5_lag_pf(ldev, first_idx)->dev;
872 	if (tracker)
873 		mlx5_lag_print_mapping(dev0, ldev, tracker, flags);
874 	mlx5_core_info(dev0, "shared_fdb:%d mode:%s\n",
875 		       shared_fdb, mlx5_get_str_port_sel_mode(mode, flags));
876 
877 	err = mlx5_cmd_create_lag(dev0, ldev, mode, flags);
878 	if (err) {
879 		mlx5_core_err(dev0,
880 			      "Failed to create LAG (%d)\n",
881 			      err);
882 		return err;
883 	}
884 
885 	if (shared_fdb) {
886 		err = mlx5_lag_create_single_fdb(ldev);
887 		if (err)
888 			mlx5_core_err(dev0, "Can't enable single FDB mode\n");
889 		else
890 			mlx5_core_info(dev0, "Operation mode is single FDB\n");
891 	}
892 
893 	if (err) {
894 		MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
895 		if (mlx5_cmd_exec_in(dev0, destroy_lag, in))
896 			mlx5_core_err(dev0,
897 				      "Failed to deactivate RoCE LAG; driver restart required\n");
898 	}
899 	BLOCKING_INIT_NOTIFIER_HEAD(&dev0->priv.lag_nh);
900 
901 	return err;
902 }
903 
mlx5_activate_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker,enum mlx5_lag_mode mode,bool shared_fdb)904 int mlx5_activate_lag(struct mlx5_lag *ldev,
905 		      struct lag_tracker *tracker,
906 		      enum mlx5_lag_mode mode,
907 		      bool shared_fdb)
908 {
909 	bool roce_lag = mode == MLX5_LAG_MODE_ROCE;
910 	struct mlx5_core_dev *dev0;
911 	unsigned long flags = 0;
912 	int master_idx;
913 	int err;
914 
915 	master_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
916 	if (master_idx < 0)
917 		return -EINVAL;
918 
919 	dev0 = mlx5_lag_pf(ldev, master_idx)->dev;
920 	err = mlx5_lag_set_flags(ldev, mode, tracker, shared_fdb, &flags);
921 	if (err)
922 		return err;
923 
924 	if (mode != MLX5_LAG_MODE_MPESW) {
925 		mlx5_infer_tx_affinity_mapping(tracker, ldev, ldev->buckets, ldev->v2p_map);
926 		if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
927 			err = mlx5_lag_port_sel_create(ldev, tracker->hash_type,
928 						       ldev->v2p_map);
929 			if (err) {
930 				mlx5_core_err(dev0,
931 					      "Failed to create LAG port selection(%d)\n",
932 					      err);
933 				return err;
934 			}
935 		}
936 	}
937 
938 	err = mlx5_create_lag(ldev, tracker, mode, flags);
939 	if (err) {
940 		if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
941 			mlx5_lag_port_sel_destroy(ldev);
942 		if (roce_lag)
943 			mlx5_core_err(dev0,
944 				      "Failed to activate RoCE LAG\n");
945 		else
946 			mlx5_core_err(dev0,
947 				      "Failed to activate VF LAG\n"
948 				      "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
949 		return err;
950 	}
951 
952 	if (tracker && tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP &&
953 	    !roce_lag)
954 		mlx5_lag_drop_rule_setup(ldev, tracker);
955 
956 	ldev->mode = mode;
957 	ldev->mode_flags = flags;
958 	return 0;
959 }
960 
mlx5_deactivate_lag(struct mlx5_lag * ldev)961 int mlx5_deactivate_lag(struct mlx5_lag *ldev)
962 {
963 	int master_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
964 	u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
965 	bool roce_lag = __mlx5_lag_is_roce(ldev);
966 	unsigned long flags = ldev->mode_flags;
967 	struct mlx5_eswitch *master_esw;
968 	struct mlx5_core_dev *dev0;
969 	int err;
970 	int i;
971 
972 	if (master_idx < 0)
973 		return -EINVAL;
974 
975 	dev0 = mlx5_lag_pf(ldev, master_idx)->dev;
976 	master_esw = dev0->priv.eswitch;
977 	ldev->mode = MLX5_LAG_MODE_NONE;
978 	ldev->mode_flags = 0;
979 	mlx5_lag_mp_reset(ldev);
980 
981 	if (test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags)) {
982 		mlx5_ldev_for_each(i, 0, ldev) {
983 			if (i == master_idx)
984 				continue;
985 			mlx5_eswitch_offloads_single_fdb_del_one(master_esw,
986 								 mlx5_lag_pf(ldev, i)->dev->priv.eswitch);
987 		}
988 		clear_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
989 	}
990 
991 	MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
992 	err = mlx5_cmd_exec_in(dev0, destroy_lag, in);
993 	if (err) {
994 		if (roce_lag) {
995 			mlx5_core_err(dev0,
996 				      "Failed to deactivate RoCE LAG; driver restart required\n");
997 		} else {
998 			mlx5_core_err(dev0,
999 				      "Failed to deactivate VF LAG; driver restart required\n"
1000 				      "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
1001 		}
1002 		return err;
1003 	}
1004 
1005 	if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
1006 		mlx5_lag_port_sel_destroy(ldev);
1007 		ldev->buckets = 1;
1008 	}
1009 	if (mlx5_lag_has_drop_rule(ldev))
1010 		mlx5_lag_drop_rule_cleanup(ldev);
1011 
1012 	return 0;
1013 }
1014 
mlx5_lag_check_prereq(struct mlx5_lag * ldev)1015 bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
1016 {
1017 	int master_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
1018 #ifdef CONFIG_MLX5_ESWITCH
1019 	struct mlx5_core_dev *dev;
1020 	u8 mode;
1021 #endif
1022 	struct lag_func *pf;
1023 	bool roce_support;
1024 	int i;
1025 
1026 	if (master_idx < 0 || mlx5_lag_num_devs(ldev) != ldev->ports)
1027 		return false;
1028 
1029 #ifdef CONFIG_MLX5_ESWITCH
1030 	mlx5_ldev_for_each(i, 0, ldev) {
1031 		pf = mlx5_lag_pf(ldev, i);
1032 		dev = pf->dev;
1033 		if (mlx5_eswitch_num_vfs(dev->priv.eswitch) && !is_mdev_switchdev_mode(dev))
1034 			return false;
1035 	}
1036 
1037 	pf = mlx5_lag_pf(ldev, master_idx);
1038 	dev = pf->dev;
1039 	mode = mlx5_eswitch_mode(dev);
1040 	mlx5_ldev_for_each(i, 0, ldev) {
1041 		pf = mlx5_lag_pf(ldev, i);
1042 		if (mlx5_eswitch_mode(pf->dev) != mode)
1043 			return false;
1044 	}
1045 
1046 #else
1047 	mlx5_ldev_for_each(i, 0, ldev) {
1048 		pf = mlx5_lag_pf(ldev, i);
1049 		if (mlx5_sriov_is_enabled(pf->dev))
1050 			return false;
1051 	}
1052 #endif
1053 	pf = mlx5_lag_pf(ldev, master_idx);
1054 	roce_support = mlx5_get_roce_state(pf->dev);
1055 	mlx5_ldev_for_each(i, 0, ldev) {
1056 		if (i == master_idx)
1057 			continue;
1058 		pf = mlx5_lag_pf(ldev, i);
1059 		if (mlx5_get_roce_state(pf->dev) != roce_support)
1060 			return false;
1061 	}
1062 
1063 	return true;
1064 }
1065 
mlx5_lag_add_devices(struct mlx5_lag * ldev)1066 void mlx5_lag_add_devices(struct mlx5_lag *ldev)
1067 {
1068 	struct lag_func *pf;
1069 	int i;
1070 
1071 	mlx5_ldev_for_each(i, 0, ldev) {
1072 		pf = mlx5_lag_pf(ldev, i);
1073 		if (pf->dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
1074 			continue;
1075 
1076 		pf->dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
1077 		mlx5_rescan_drivers_locked(pf->dev);
1078 	}
1079 }
1080 
mlx5_lag_remove_devices(struct mlx5_lag * ldev)1081 void mlx5_lag_remove_devices(struct mlx5_lag *ldev)
1082 {
1083 	struct lag_func *pf;
1084 	int i;
1085 
1086 	mlx5_ldev_for_each(i, 0, ldev) {
1087 		pf = mlx5_lag_pf(ldev, i);
1088 		if (pf->dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
1089 			continue;
1090 
1091 		pf->dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
1092 		mlx5_rescan_drivers_locked(pf->dev);
1093 	}
1094 }
1095 
mlx5_disable_lag(struct mlx5_lag * ldev)1096 void mlx5_disable_lag(struct mlx5_lag *ldev)
1097 {
1098 	bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
1099 	int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
1100 	struct mlx5_core_dev *dev0;
1101 	bool roce_lag;
1102 	int err;
1103 	int i;
1104 
1105 	if (idx < 0)
1106 		return;
1107 
1108 	dev0 = mlx5_lag_pf(ldev, idx)->dev;
1109 	roce_lag = __mlx5_lag_is_roce(ldev);
1110 
1111 	if (shared_fdb) {
1112 		mlx5_lag_remove_devices(ldev);
1113 	} else if (roce_lag) {
1114 		if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) {
1115 			dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
1116 			mlx5_rescan_drivers_locked(dev0);
1117 		}
1118 		mlx5_ldev_for_each(i, 0, ldev) {
1119 			if (i == idx)
1120 				continue;
1121 			mlx5_nic_vport_disable_roce(mlx5_lag_pf(ldev, i)->dev);
1122 		}
1123 	}
1124 
1125 	err = mlx5_deactivate_lag(ldev);
1126 	if (err)
1127 		return;
1128 
1129 	if (shared_fdb || roce_lag)
1130 		mlx5_lag_add_devices(ldev);
1131 
1132 	if (shared_fdb)
1133 		mlx5_ldev_for_each(i, 0, ldev)
1134 			if (!(mlx5_lag_pf(ldev, i)->dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
1135 				mlx5_eswitch_reload_ib_reps(mlx5_lag_pf(ldev, i)->dev->priv.eswitch);
1136 }
1137 
mlx5_lag_shared_fdb_supported(struct mlx5_lag * ldev)1138 bool mlx5_lag_shared_fdb_supported(struct mlx5_lag *ldev)
1139 {
1140 	struct mlx5_core_dev *dev;
1141 	bool ret = false;
1142 	int idx;
1143 	int i;
1144 
1145 	idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
1146 	if (idx < 0)
1147 		return false;
1148 
1149 	mlx5_ldev_for_each(i, 0, ldev) {
1150 		if (i == idx)
1151 			continue;
1152 		dev = mlx5_lag_pf(ldev, i)->dev;
1153 		if (is_mdev_switchdev_mode(dev) &&
1154 		    mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) &&
1155 		    MLX5_CAP_GEN(dev, lag_native_fdb_selection) &&
1156 		    MLX5_CAP_ESW(dev, root_ft_on_other_esw) &&
1157 		    mlx5_eswitch_get_npeers(dev->priv.eswitch) ==
1158 		    MLX5_CAP_GEN(dev, num_lag_ports) - 1)
1159 			continue;
1160 		return false;
1161 	}
1162 
1163 	dev = mlx5_lag_pf(ldev, idx)->dev;
1164 	if (is_mdev_switchdev_mode(dev) &&
1165 	    mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) &&
1166 	    mlx5_esw_offloads_devcom_is_ready(dev->priv.eswitch) &&
1167 	    MLX5_CAP_ESW(dev, esw_shared_ingress_acl) &&
1168 	    mlx5_eswitch_get_npeers(dev->priv.eswitch) == MLX5_CAP_GEN(dev, num_lag_ports) - 1)
1169 		ret = true;
1170 
1171 	return ret;
1172 }
1173 
mlx5_lag_is_roce_lag(struct mlx5_lag * ldev)1174 static bool mlx5_lag_is_roce_lag(struct mlx5_lag *ldev)
1175 {
1176 	bool roce_lag = true;
1177 	struct lag_func *pf;
1178 	int i;
1179 
1180 	mlx5_ldev_for_each(i, 0, ldev) {
1181 		pf = mlx5_lag_pf(ldev, i);
1182 		roce_lag = roce_lag && !mlx5_sriov_is_enabled(pf->dev);
1183 	}
1184 
1185 #ifdef CONFIG_MLX5_ESWITCH
1186 	mlx5_ldev_for_each(i, 0, ldev) {
1187 		pf = mlx5_lag_pf(ldev, i);
1188 		roce_lag = roce_lag && is_mdev_legacy_mode(pf->dev);
1189 	}
1190 #endif
1191 
1192 	return roce_lag;
1193 }
1194 
mlx5_lag_should_modify_lag(struct mlx5_lag * ldev,bool do_bond)1195 static bool mlx5_lag_should_modify_lag(struct mlx5_lag *ldev, bool do_bond)
1196 {
1197 	return do_bond && __mlx5_lag_is_active(ldev) &&
1198 	       ldev->mode != MLX5_LAG_MODE_MPESW;
1199 }
1200 
mlx5_lag_should_disable_lag(struct mlx5_lag * ldev,bool do_bond)1201 static bool mlx5_lag_should_disable_lag(struct mlx5_lag *ldev, bool do_bond)
1202 {
1203 	return !do_bond && __mlx5_lag_is_active(ldev) &&
1204 	       ldev->mode != MLX5_LAG_MODE_MPESW;
1205 }
1206 
1207 #ifdef CONFIG_MLX5_ESWITCH
1208 static int
mlx5_lag_sum_devices_speed(struct mlx5_lag * ldev,u32 * sum_speed,int (* get_speed)(struct mlx5_core_dev *,u32 *))1209 mlx5_lag_sum_devices_speed(struct mlx5_lag *ldev, u32 *sum_speed,
1210 			   int (*get_speed)(struct mlx5_core_dev *, u32 *))
1211 {
1212 	struct mlx5_core_dev *pf_mdev;
1213 	struct lag_func *pf;
1214 	int pf_idx;
1215 	u32 speed;
1216 	int ret;
1217 
1218 	*sum_speed = 0;
1219 	mlx5_ldev_for_each(pf_idx, 0, ldev) {
1220 		pf = mlx5_lag_pf(ldev, pf_idx);
1221 		if (!pf)
1222 			continue;
1223 		pf_mdev = pf->dev;
1224 		if (!pf_mdev)
1225 			continue;
1226 
1227 		ret = get_speed(pf_mdev, &speed);
1228 		if (ret) {
1229 			mlx5_core_dbg(pf_mdev,
1230 				      "Failed to get device speed using %ps. Device %s speed is not available (err=%d)\n",
1231 				      get_speed, dev_name(pf_mdev->device),
1232 				      ret);
1233 			return ret;
1234 		}
1235 
1236 		*sum_speed += speed;
1237 	}
1238 
1239 	return 0;
1240 }
1241 
mlx5_lag_sum_devices_max_speed(struct mlx5_lag * ldev,u32 * max_speed)1242 static int mlx5_lag_sum_devices_max_speed(struct mlx5_lag *ldev, u32 *max_speed)
1243 {
1244 	return mlx5_lag_sum_devices_speed(ldev, max_speed,
1245 					  mlx5_port_max_linkspeed);
1246 }
1247 
mlx5_lag_sum_devices_oper_speed(struct mlx5_lag * ldev,u32 * oper_speed)1248 static int mlx5_lag_sum_devices_oper_speed(struct mlx5_lag *ldev,
1249 					   u32 *oper_speed)
1250 {
1251 	return mlx5_lag_sum_devices_speed(ldev, oper_speed,
1252 					  mlx5_port_oper_linkspeed);
1253 }
1254 
mlx5_lag_modify_device_vports_speed(struct mlx5_core_dev * mdev,u32 speed)1255 static void mlx5_lag_modify_device_vports_speed(struct mlx5_core_dev *mdev,
1256 						u32 speed)
1257 {
1258 	u16 op_mod = MLX5_VPORT_STATE_OP_MOD_ESW_VPORT;
1259 	struct mlx5_eswitch *esw = mdev->priv.eswitch;
1260 	struct mlx5_vport *vport;
1261 	unsigned long i;
1262 	int ret;
1263 
1264 	if (!esw)
1265 		return;
1266 
1267 	if (!MLX5_CAP_ESW(mdev, esw_vport_state_max_tx_speed))
1268 		return;
1269 
1270 	mlx5_esw_for_each_vport(esw, i, vport) {
1271 		if (!vport)
1272 			continue;
1273 
1274 		if (vport->vport == MLX5_VPORT_UPLINK)
1275 			continue;
1276 
1277 		vport->agg_max_tx_speed = speed;
1278 
1279 		if (!vport->enabled)
1280 			continue;
1281 
1282 		ret = mlx5_modify_vport_max_tx_speed(mdev, op_mod,
1283 						     vport->vport, true, speed);
1284 		if (ret)
1285 			mlx5_core_dbg(mdev,
1286 				      "Failed to set vport %d speed %d, err=%d\n",
1287 				      vport->vport, speed, ret);
1288 	}
1289 }
1290 
mlx5_lag_set_vports_agg_speed(struct mlx5_lag * ldev)1291 void mlx5_lag_set_vports_agg_speed(struct mlx5_lag *ldev)
1292 {
1293 	struct mlx5_core_dev *mdev;
1294 	struct lag_func *pf;
1295 	u32 speed;
1296 	int pf_idx;
1297 
1298 	if (ldev->mode == MLX5_LAG_MODE_MPESW) {
1299 		if (mlx5_lag_sum_devices_oper_speed(ldev, &speed))
1300 			return;
1301 	} else {
1302 		speed = ldev->tracker.bond_speed_mbps;
1303 		if (speed == SPEED_UNKNOWN)
1304 			return;
1305 	}
1306 
1307 	/* If speed is not set, use the sum of max speeds of all PFs */
1308 	if (!speed && mlx5_lag_sum_devices_max_speed(ldev, &speed))
1309 		return;
1310 
1311 	speed = speed / MLX5_MAX_TX_SPEED_UNIT;
1312 
1313 	mlx5_ldev_for_each(pf_idx, 0, ldev) {
1314 		pf = mlx5_lag_pf(ldev, pf_idx);
1315 		if (!pf)
1316 			continue;
1317 		mdev = pf->dev;
1318 		if (!mdev)
1319 			continue;
1320 
1321 		mlx5_lag_modify_device_vports_speed(mdev, speed);
1322 	}
1323 }
1324 
mlx5_lag_reset_vports_speed(struct mlx5_lag * ldev)1325 void mlx5_lag_reset_vports_speed(struct mlx5_lag *ldev)
1326 {
1327 	struct mlx5_core_dev *mdev;
1328 	struct lag_func *pf;
1329 	u32 speed;
1330 	int pf_idx;
1331 	int ret;
1332 
1333 	mlx5_ldev_for_each(pf_idx, 0, ldev) {
1334 		pf = mlx5_lag_pf(ldev, pf_idx);
1335 		if (!pf)
1336 			continue;
1337 		mdev = pf->dev;
1338 		if (!mdev)
1339 			continue;
1340 
1341 		ret = mlx5_port_oper_linkspeed(mdev, &speed);
1342 		if (ret) {
1343 			mlx5_core_dbg(mdev,
1344 				      "Failed to reset vports speed for device %s. Oper speed is not available (err=%d)\n",
1345 				      dev_name(mdev->device), ret);
1346 			continue;
1347 		}
1348 
1349 		speed = speed / MLX5_MAX_TX_SPEED_UNIT;
1350 		mlx5_lag_modify_device_vports_speed(mdev, speed);
1351 	}
1352 }
1353 #endif
1354 
mlx5_do_bond(struct mlx5_lag * ldev)1355 static void mlx5_do_bond(struct mlx5_lag *ldev)
1356 {
1357 	int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
1358 	struct lag_tracker tracker = { };
1359 	struct mlx5_core_dev *dev0;
1360 	struct net_device *ndev;
1361 	bool do_bond, roce_lag;
1362 	int err;
1363 	int i;
1364 
1365 	if (idx < 0)
1366 		return;
1367 
1368 	dev0 = mlx5_lag_pf(ldev, idx)->dev;
1369 	if (!mlx5_lag_is_ready(ldev)) {
1370 		do_bond = false;
1371 	} else {
1372 		/* VF LAG is in multipath mode, ignore bond change requests */
1373 		if (mlx5_lag_is_multipath(dev0))
1374 			return;
1375 
1376 		tracker = ldev->tracker;
1377 
1378 		do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
1379 	}
1380 
1381 	if (do_bond && !__mlx5_lag_is_active(ldev)) {
1382 		bool shared_fdb = mlx5_lag_shared_fdb_supported(ldev);
1383 
1384 		roce_lag = mlx5_lag_is_roce_lag(ldev);
1385 
1386 		if (shared_fdb || roce_lag)
1387 			mlx5_lag_remove_devices(ldev);
1388 
1389 		err = mlx5_activate_lag(ldev, &tracker,
1390 					roce_lag ? MLX5_LAG_MODE_ROCE :
1391 						   MLX5_LAG_MODE_SRIOV,
1392 					shared_fdb);
1393 		if (err) {
1394 			if (shared_fdb || roce_lag)
1395 				mlx5_lag_add_devices(ldev);
1396 			if (shared_fdb) {
1397 				mlx5_ldev_for_each(i, 0, ldev)
1398 					mlx5_eswitch_reload_ib_reps(mlx5_lag_pf(ldev, i)->dev->priv.eswitch);
1399 			}
1400 
1401 			return;
1402 		}
1403 
1404 		if (roce_lag) {
1405 			struct mlx5_core_dev *dev;
1406 
1407 			dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
1408 			mlx5_rescan_drivers_locked(dev0);
1409 			mlx5_ldev_for_each(i, 0, ldev) {
1410 				if (i == idx)
1411 					continue;
1412 				dev = mlx5_lag_pf(ldev, i)->dev;
1413 				if (mlx5_get_roce_state(dev))
1414 					mlx5_nic_vport_enable_roce(dev);
1415 			}
1416 		} else if (shared_fdb) {
1417 			int i;
1418 
1419 			dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
1420 			mlx5_rescan_drivers_locked(dev0);
1421 
1422 			mlx5_ldev_for_each(i, 0, ldev) {
1423 				err = mlx5_eswitch_reload_ib_reps(mlx5_lag_pf(ldev, i)->dev->priv.eswitch);
1424 				if (err)
1425 					break;
1426 			}
1427 
1428 			if (err) {
1429 				dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
1430 				mlx5_rescan_drivers_locked(dev0);
1431 				mlx5_deactivate_lag(ldev);
1432 				mlx5_lag_add_devices(ldev);
1433 				mlx5_ldev_for_each(i, 0, ldev)
1434 					mlx5_eswitch_reload_ib_reps(mlx5_lag_pf(ldev, i)->dev->priv.eswitch);
1435 				mlx5_core_err(dev0, "Failed to enable lag\n");
1436 				return;
1437 			}
1438 		}
1439 		if (tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
1440 			ndev = mlx5_lag_active_backup_get_netdev(dev0);
1441 			/** Only sriov and roce lag should have tracker->TX_type
1442 			 *  set so no need to check the mode
1443 			 */
1444 			blocking_notifier_call_chain(&dev0->priv.lag_nh,
1445 						     MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE,
1446 						     ndev);
1447 			dev_put(ndev);
1448 		}
1449 		mlx5_lag_set_vports_agg_speed(ldev);
1450 	} else if (mlx5_lag_should_modify_lag(ldev, do_bond)) {
1451 		mlx5_modify_lag(ldev, &tracker);
1452 		mlx5_lag_set_vports_agg_speed(ldev);
1453 	} else if (mlx5_lag_should_disable_lag(ldev, do_bond)) {
1454 		mlx5_lag_reset_vports_speed(ldev);
1455 		mlx5_disable_lag(ldev);
1456 	}
1457 }
1458 
1459 /* The last mdev to unregister will destroy the workqueue before removing the
1460  * devcom component, and as all the mdevs use the same devcom component we are
1461  * guaranteed that the devcom is valid while the calling work is running.
1462  */
mlx5_lag_get_devcom_comp(struct mlx5_lag * ldev)1463 struct mlx5_devcom_comp_dev *mlx5_lag_get_devcom_comp(struct mlx5_lag *ldev)
1464 {
1465 	struct mlx5_devcom_comp_dev *devcom = NULL;
1466 	struct lag_func *pf;
1467 	int i;
1468 
1469 	mutex_lock(&ldev->lock);
1470 	i = mlx5_get_next_ldev_func(ldev, 0);
1471 	if (i < MLX5_MAX_PORTS) {
1472 		pf = mlx5_lag_pf(ldev, i);
1473 		devcom = pf->dev->priv.hca_devcom_comp;
1474 	}
1475 	mutex_unlock(&ldev->lock);
1476 	return devcom;
1477 }
1478 
mlx5_lag_demux_ft_fg_init(struct mlx5_core_dev * dev,struct mlx5_flow_table_attr * ft_attr,struct mlx5_lag * ldev)1479 static int mlx5_lag_demux_ft_fg_init(struct mlx5_core_dev *dev,
1480 				     struct mlx5_flow_table_attr *ft_attr,
1481 				     struct mlx5_lag *ldev)
1482 {
1483 #ifdef CONFIG_MLX5_ESWITCH
1484 	struct mlx5_flow_namespace *ns;
1485 	struct mlx5_flow_group *fg;
1486 	int err;
1487 
1488 	ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_LAG);
1489 	if (!ns)
1490 		return 0;
1491 
1492 	ldev->lag_demux_ft = mlx5_create_flow_table(ns, ft_attr);
1493 	if (IS_ERR(ldev->lag_demux_ft))
1494 		return PTR_ERR(ldev->lag_demux_ft);
1495 
1496 	fg = mlx5_esw_lag_demux_fg_create(dev->priv.eswitch,
1497 					  ldev->lag_demux_ft);
1498 	if (IS_ERR(fg)) {
1499 		err = PTR_ERR(fg);
1500 		mlx5_destroy_flow_table(ldev->lag_demux_ft);
1501 		ldev->lag_demux_ft = NULL;
1502 		return err;
1503 	}
1504 
1505 	ldev->lag_demux_fg = fg;
1506 	return 0;
1507 #else
1508 	return -EOPNOTSUPP;
1509 #endif
1510 }
1511 
mlx5_lag_demux_fw_init(struct mlx5_core_dev * dev,struct mlx5_flow_table_attr * ft_attr,struct mlx5_lag * ldev)1512 static int mlx5_lag_demux_fw_init(struct mlx5_core_dev *dev,
1513 				  struct mlx5_flow_table_attr *ft_attr,
1514 				  struct mlx5_lag *ldev)
1515 {
1516 	struct mlx5_flow_namespace *ns;
1517 	int err;
1518 
1519 	ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_LAG);
1520 	if (!ns)
1521 		return 0;
1522 
1523 	ldev->lag_demux_fg = NULL;
1524 	ft_attr->max_fte = 1;
1525 	ldev->lag_demux_ft = mlx5_create_lag_demux_flow_table(ns, ft_attr);
1526 	if (IS_ERR(ldev->lag_demux_ft)) {
1527 		err = PTR_ERR(ldev->lag_demux_ft);
1528 		ldev->lag_demux_ft = NULL;
1529 		return err;
1530 	}
1531 
1532 	return 0;
1533 }
1534 
mlx5_lag_demux_init(struct mlx5_core_dev * dev,struct mlx5_flow_table_attr * ft_attr)1535 int mlx5_lag_demux_init(struct mlx5_core_dev *dev,
1536 			struct mlx5_flow_table_attr *ft_attr)
1537 {
1538 	struct mlx5_lag *ldev;
1539 
1540 	if (!ft_attr)
1541 		return -EINVAL;
1542 
1543 	ldev = mlx5_lag_dev(dev);
1544 	if (!ldev)
1545 		return -ENODEV;
1546 
1547 	xa_init(&ldev->lag_demux_rules);
1548 
1549 	if (mlx5_get_sd(dev))
1550 		return mlx5_lag_demux_ft_fg_init(dev, ft_attr, ldev);
1551 
1552 	return mlx5_lag_demux_fw_init(dev, ft_attr, ldev);
1553 }
1554 EXPORT_SYMBOL(mlx5_lag_demux_init);
1555 
mlx5_lag_demux_cleanup(struct mlx5_core_dev * dev)1556 void mlx5_lag_demux_cleanup(struct mlx5_core_dev *dev)
1557 {
1558 	struct mlx5_flow_handle *rule;
1559 	struct mlx5_lag *ldev;
1560 	unsigned long vport_num;
1561 
1562 	ldev = mlx5_lag_dev(dev);
1563 	if (!ldev)
1564 		return;
1565 
1566 	xa_for_each(&ldev->lag_demux_rules, vport_num, rule)
1567 		mlx5_del_flow_rules(rule);
1568 	xa_destroy(&ldev->lag_demux_rules);
1569 
1570 	if (ldev->lag_demux_fg)
1571 		mlx5_destroy_flow_group(ldev->lag_demux_fg);
1572 	if (ldev->lag_demux_ft)
1573 		mlx5_destroy_flow_table(ldev->lag_demux_ft);
1574 	ldev->lag_demux_fg = NULL;
1575 	ldev->lag_demux_ft = NULL;
1576 }
1577 EXPORT_SYMBOL(mlx5_lag_demux_cleanup);
1578 
mlx5_lag_demux_rule_add(struct mlx5_core_dev * vport_dev,u16 vport_num,int index)1579 int mlx5_lag_demux_rule_add(struct mlx5_core_dev *vport_dev, u16 vport_num,
1580 			    int index)
1581 {
1582 	struct mlx5_flow_handle *rule;
1583 	struct mlx5_lag *ldev;
1584 	int err;
1585 
1586 	ldev = mlx5_lag_dev(vport_dev);
1587 	if (!ldev || !ldev->lag_demux_fg)
1588 		return 0;
1589 
1590 	if (xa_load(&ldev->lag_demux_rules, index))
1591 		return 0;
1592 
1593 	rule = mlx5_esw_lag_demux_rule_create(vport_dev->priv.eswitch,
1594 					      vport_num, ldev->lag_demux_ft);
1595 	if (IS_ERR(rule)) {
1596 		err = PTR_ERR(rule);
1597 		mlx5_core_warn(vport_dev,
1598 			       "Failed to create LAG demux rule for vport %u, err %d\n",
1599 			       vport_num, err);
1600 		return err;
1601 	}
1602 
1603 	err = xa_err(xa_store(&ldev->lag_demux_rules, index, rule,
1604 			      GFP_KERNEL));
1605 	if (err) {
1606 		mlx5_del_flow_rules(rule);
1607 		mlx5_core_warn(vport_dev,
1608 			       "Failed to store LAG demux rule for vport %u, err %d\n",
1609 			       vport_num, err);
1610 	}
1611 
1612 	return err;
1613 }
1614 EXPORT_SYMBOL(mlx5_lag_demux_rule_add);
1615 
mlx5_lag_demux_rule_del(struct mlx5_core_dev * dev,int index)1616 void mlx5_lag_demux_rule_del(struct mlx5_core_dev *dev, int index)
1617 {
1618 	struct mlx5_flow_handle *rule;
1619 	struct mlx5_lag *ldev;
1620 
1621 	ldev = mlx5_lag_dev(dev);
1622 	if (!ldev || !ldev->lag_demux_fg)
1623 		return;
1624 
1625 	rule = xa_erase(&ldev->lag_demux_rules, index);
1626 	if (rule)
1627 		mlx5_del_flow_rules(rule);
1628 }
1629 EXPORT_SYMBOL(mlx5_lag_demux_rule_del);
1630 
mlx5_queue_bond_work(struct mlx5_lag * ldev,unsigned long delay)1631 static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
1632 {
1633 	queue_delayed_work(ldev->wq, &ldev->bond_work, delay);
1634 }
1635 
mlx5_do_bond_work(struct work_struct * work)1636 static void mlx5_do_bond_work(struct work_struct *work)
1637 {
1638 	struct delayed_work *delayed_work = to_delayed_work(work);
1639 	struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag,
1640 					     bond_work);
1641 	struct mlx5_devcom_comp_dev *devcom;
1642 	int status;
1643 
1644 	devcom = mlx5_lag_get_devcom_comp(ldev);
1645 	if (!devcom)
1646 		return;
1647 
1648 	status = mlx5_devcom_comp_trylock(devcom);
1649 	if (!status) {
1650 		mlx5_queue_bond_work(ldev, HZ);
1651 		return;
1652 	}
1653 
1654 	mutex_lock(&ldev->lock);
1655 	if (ldev->mode_changes_in_progress) {
1656 		mutex_unlock(&ldev->lock);
1657 		mlx5_devcom_comp_unlock(devcom);
1658 		mlx5_queue_bond_work(ldev, HZ);
1659 		return;
1660 	}
1661 
1662 	mlx5_do_bond(ldev);
1663 	mutex_unlock(&ldev->lock);
1664 	mlx5_devcom_comp_unlock(devcom);
1665 }
1666 
mlx5_handle_changeupper_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct netdev_notifier_changeupper_info * info)1667 static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
1668 					 struct lag_tracker *tracker,
1669 					 struct netdev_notifier_changeupper_info *info)
1670 {
1671 	struct net_device *upper = info->upper_dev, *ndev_tmp;
1672 	struct netdev_lag_upper_info *lag_upper_info = NULL;
1673 	bool is_bonded, is_in_lag, mode_supported;
1674 	bool has_inactive = 0;
1675 	struct lag_func *pf;
1676 	struct slave *slave;
1677 	u8 bond_status = 0;
1678 	int num_slaves = 0;
1679 	int changed = 0;
1680 	int i, idx = -1;
1681 
1682 	if (!netif_is_lag_master(upper))
1683 		return 0;
1684 
1685 	if (info->linking)
1686 		lag_upper_info = info->upper_info;
1687 
1688 	/* The event may still be of interest if the slave does not belong to
1689 	 * us, but is enslaved to a master which has one or more of our netdevs
1690 	 * as slaves (e.g., if a new slave is added to a master that bonds two
1691 	 * of our netdevs, we should unbond).
1692 	 */
1693 	rcu_read_lock();
1694 	for_each_netdev_in_bond_rcu(upper, ndev_tmp) {
1695 		mlx5_ldev_for_each(i, 0, ldev) {
1696 			pf = mlx5_lag_pf(ldev, i);
1697 			if (pf->netdev == ndev_tmp) {
1698 				idx++;
1699 				break;
1700 			}
1701 		}
1702 		if (i < MLX5_MAX_PORTS) {
1703 			slave = bond_slave_get_rcu(ndev_tmp);
1704 			if (slave)
1705 				has_inactive |= bond_is_slave_inactive(slave);
1706 			bond_status |= (1 << idx);
1707 		}
1708 
1709 		num_slaves++;
1710 	}
1711 	rcu_read_unlock();
1712 
1713 	/* None of this lagdev's netdevs are slaves of this master. */
1714 	if (!(bond_status & GENMASK(ldev->ports - 1, 0)))
1715 		return 0;
1716 
1717 	if (lag_upper_info) {
1718 		tracker->tx_type = lag_upper_info->tx_type;
1719 		tracker->hash_type = lag_upper_info->hash_type;
1720 	}
1721 
1722 	tracker->has_inactive = has_inactive;
1723 	/* Determine bonding status:
1724 	 * A device is considered bonded if both its physical ports are slaves
1725 	 * of the same lag master, and only them.
1726 	 */
1727 	is_in_lag = num_slaves == ldev->ports &&
1728 		bond_status == GENMASK(ldev->ports - 1, 0);
1729 
1730 	/* Lag mode must be activebackup or hash. */
1731 	mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP ||
1732 			 tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH;
1733 
1734 	is_bonded = is_in_lag && mode_supported;
1735 	if (tracker->is_bonded != is_bonded) {
1736 		tracker->is_bonded = is_bonded;
1737 		changed = 1;
1738 	}
1739 
1740 	if (!is_in_lag)
1741 		return changed;
1742 
1743 	if (!mlx5_lag_is_ready(ldev))
1744 		NL_SET_ERR_MSG_MOD(info->info.extack,
1745 				   "Can't activate LAG offload, PF is configured with more than 64 VFs");
1746 	else if (!mode_supported)
1747 		NL_SET_ERR_MSG_MOD(info->info.extack,
1748 				   "Can't activate LAG offload, TX type isn't supported");
1749 
1750 	return changed;
1751 }
1752 
mlx5_handle_changelowerstate_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct net_device * ndev,struct netdev_notifier_changelowerstate_info * info)1753 static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev,
1754 					      struct lag_tracker *tracker,
1755 					      struct net_device *ndev,
1756 					      struct netdev_notifier_changelowerstate_info *info)
1757 {
1758 	struct netdev_lag_lower_state_info *lag_lower_info;
1759 	int idx;
1760 
1761 	if (!netif_is_lag_port(ndev))
1762 		return 0;
1763 
1764 	idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev);
1765 	if (idx < 0)
1766 		return 0;
1767 
1768 	/* This information is used to determine virtual to physical
1769 	 * port mapping.
1770 	 */
1771 	lag_lower_info = info->lower_state_info;
1772 	if (!lag_lower_info)
1773 		return 0;
1774 
1775 	tracker->netdev_state[idx] = *lag_lower_info;
1776 
1777 	return 1;
1778 }
1779 
mlx5_handle_changeinfodata_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct net_device * ndev)1780 static int mlx5_handle_changeinfodata_event(struct mlx5_lag *ldev,
1781 					    struct lag_tracker *tracker,
1782 					    struct net_device *ndev)
1783 {
1784 	struct net_device *ndev_tmp;
1785 	struct slave *slave;
1786 	bool has_inactive = 0;
1787 	int idx;
1788 
1789 	if (!netif_is_lag_master(ndev))
1790 		return 0;
1791 
1792 	rcu_read_lock();
1793 	for_each_netdev_in_bond_rcu(ndev, ndev_tmp) {
1794 		idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
1795 		if (idx < 0)
1796 			continue;
1797 
1798 		slave = bond_slave_get_rcu(ndev_tmp);
1799 		if (slave)
1800 			has_inactive |= bond_is_slave_inactive(slave);
1801 	}
1802 	rcu_read_unlock();
1803 
1804 	if (tracker->has_inactive == has_inactive)
1805 		return 0;
1806 
1807 	tracker->has_inactive = has_inactive;
1808 
1809 	return 1;
1810 }
1811 
mlx5_lag_update_tracker_speed(struct lag_tracker * tracker,struct net_device * ndev)1812 static void mlx5_lag_update_tracker_speed(struct lag_tracker *tracker,
1813 					  struct net_device *ndev)
1814 {
1815 	struct ethtool_link_ksettings lksettings;
1816 	struct net_device *bond_dev;
1817 	int err;
1818 
1819 	if (netif_is_lag_master(ndev))
1820 		bond_dev = ndev;
1821 	else
1822 		bond_dev = netdev_master_upper_dev_get(ndev);
1823 
1824 	if (!bond_dev) {
1825 		tracker->bond_speed_mbps = SPEED_UNKNOWN;
1826 		return;
1827 	}
1828 
1829 	err = __ethtool_get_link_ksettings(bond_dev, &lksettings);
1830 	if (err) {
1831 		netdev_dbg(bond_dev,
1832 			   "Failed to get speed for bond dev %s, err=%d\n",
1833 			   bond_dev->name, err);
1834 		tracker->bond_speed_mbps = SPEED_UNKNOWN;
1835 		return;
1836 	}
1837 
1838 	if (lksettings.base.speed == SPEED_UNKNOWN)
1839 		tracker->bond_speed_mbps = 0;
1840 	else
1841 		tracker->bond_speed_mbps = lksettings.base.speed;
1842 }
1843 
1844 /* Returns speed in Mbps. */
mlx5_lag_query_bond_speed(struct mlx5_core_dev * mdev,u32 * speed)1845 int mlx5_lag_query_bond_speed(struct mlx5_core_dev *mdev, u32 *speed)
1846 {
1847 	struct mlx5_lag *ldev;
1848 	unsigned long flags;
1849 	int ret = 0;
1850 
1851 	spin_lock_irqsave(&lag_lock, flags);
1852 	ldev = mlx5_lag_dev(mdev);
1853 	if (!ldev) {
1854 		ret = -ENODEV;
1855 		goto unlock;
1856 	}
1857 
1858 	*speed = ldev->tracker.bond_speed_mbps;
1859 
1860 	if (*speed == SPEED_UNKNOWN) {
1861 		mlx5_core_dbg(mdev, "Bond speed is unknown\n");
1862 		ret = -EINVAL;
1863 	}
1864 
1865 unlock:
1866 	spin_unlock_irqrestore(&lag_lock, flags);
1867 	return ret;
1868 }
1869 EXPORT_SYMBOL_GPL(mlx5_lag_query_bond_speed);
1870 
1871 /* this handler is always registered to netdev events */
mlx5_lag_netdev_event(struct notifier_block * this,unsigned long event,void * ptr)1872 static int mlx5_lag_netdev_event(struct notifier_block *this,
1873 				 unsigned long event, void *ptr)
1874 {
1875 	struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
1876 	struct lag_tracker tracker;
1877 	struct mlx5_lag *ldev;
1878 	int changed = 0;
1879 
1880 	if (event != NETDEV_CHANGEUPPER &&
1881 	    event != NETDEV_CHANGELOWERSTATE &&
1882 	    event != NETDEV_CHANGEINFODATA)
1883 		return NOTIFY_DONE;
1884 
1885 	ldev    = container_of(this, struct mlx5_lag, nb);
1886 
1887 	tracker = ldev->tracker;
1888 
1889 	switch (event) {
1890 	case NETDEV_CHANGEUPPER:
1891 		changed = mlx5_handle_changeupper_event(ldev, &tracker, ptr);
1892 		break;
1893 	case NETDEV_CHANGELOWERSTATE:
1894 		changed = mlx5_handle_changelowerstate_event(ldev, &tracker,
1895 							     ndev, ptr);
1896 		break;
1897 	case NETDEV_CHANGEINFODATA:
1898 		changed = mlx5_handle_changeinfodata_event(ldev, &tracker, ndev);
1899 		break;
1900 	}
1901 
1902 	if (changed)
1903 		mlx5_lag_update_tracker_speed(&tracker, ndev);
1904 
1905 	ldev->tracker = tracker;
1906 
1907 	if (changed)
1908 		mlx5_queue_bond_work(ldev, 0);
1909 
1910 	return NOTIFY_DONE;
1911 }
1912 
mlx5_ldev_add_netdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev,struct net_device * netdev)1913 static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev,
1914 				struct mlx5_core_dev *dev,
1915 				struct net_device *netdev)
1916 {
1917 	struct lag_func *pf;
1918 	unsigned long flags;
1919 	int i;
1920 
1921 	spin_lock_irqsave(&lag_lock, flags);
1922 	/* Find pf entry by matching dev pointer */
1923 	mlx5_ldev_for_each(i, 0, ldev) {
1924 		pf = mlx5_lag_pf(ldev, i);
1925 		if (pf->dev == dev) {
1926 			pf->netdev = netdev;
1927 			ldev->tracker.netdev_state[i].link_up = 0;
1928 			ldev->tracker.netdev_state[i].tx_enabled = 0;
1929 			break;
1930 		}
1931 	}
1932 	spin_unlock_irqrestore(&lag_lock, flags);
1933 }
1934 
mlx5_ldev_remove_netdev(struct mlx5_lag * ldev,struct net_device * netdev)1935 static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev,
1936 				    struct net_device *netdev)
1937 {
1938 	struct lag_func *pf;
1939 	unsigned long flags;
1940 	int i;
1941 
1942 	spin_lock_irqsave(&lag_lock, flags);
1943 	mlx5_ldev_for_each(i, 0, ldev) {
1944 		pf = mlx5_lag_pf(ldev, i);
1945 		if (pf->netdev == netdev) {
1946 			pf->netdev = NULL;
1947 			break;
1948 		}
1949 	}
1950 	spin_unlock_irqrestore(&lag_lock, flags);
1951 }
1952 
mlx5_ldev_add_mdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev)1953 static int mlx5_ldev_add_mdev(struct mlx5_lag *ldev,
1954 			      struct mlx5_core_dev *dev)
1955 {
1956 	struct lag_func *pf;
1957 	u32 idx;
1958 	int err;
1959 
1960 	pf = kzalloc_obj(*pf);
1961 	if (!pf)
1962 		return -ENOMEM;
1963 
1964 	err = xa_alloc(&ldev->pfs, &idx, pf, XA_LIMIT(0, MLX5_MAX_PORTS - 1),
1965 		       GFP_KERNEL);
1966 	if (err) {
1967 		kfree(pf);
1968 		return err;
1969 	}
1970 
1971 	pf->idx = idx;
1972 	pf->dev = dev;
1973 	dev->priv.lag = ldev;
1974 
1975 	MLX5_NB_INIT(&pf->port_change_nb,
1976 		     mlx5_lag_mpesw_port_change_event, PORT_CHANGE);
1977 	mlx5_eq_notifier_register(dev, &pf->port_change_nb);
1978 
1979 	return 0;
1980 }
1981 
mlx5_ldev_remove_mdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev)1982 static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
1983 				  struct mlx5_core_dev *dev)
1984 {
1985 	struct lag_func *pf;
1986 	int i;
1987 
1988 	mlx5_ldev_for_each(i, 0, ldev) {
1989 		pf = mlx5_lag_pf(ldev, i);
1990 		if (pf->dev == dev)
1991 			break;
1992 	}
1993 	if (i >= MLX5_MAX_PORTS)
1994 		return;
1995 
1996 	if (pf->port_change_nb.nb.notifier_call)
1997 		mlx5_eq_notifier_unregister(dev, &pf->port_change_nb);
1998 
1999 	pf->dev = NULL;
2000 	dev->priv.lag = NULL;
2001 	xa_erase(&ldev->pfs, pf->idx);
2002 	kfree(pf);
2003 }
2004 
2005 /* Must be called with HCA devcom component lock held */
__mlx5_lag_dev_add_mdev(struct mlx5_core_dev * dev)2006 static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
2007 {
2008 	struct mlx5_devcom_comp_dev *pos = NULL;
2009 	struct mlx5_lag *ldev = NULL;
2010 	struct mlx5_core_dev *tmp_dev;
2011 	int err;
2012 
2013 	tmp_dev = mlx5_devcom_get_next_peer_data(dev->priv.hca_devcom_comp, &pos);
2014 	if (tmp_dev)
2015 		ldev = mlx5_lag_dev(tmp_dev);
2016 
2017 	if (!ldev) {
2018 		ldev = mlx5_lag_dev_alloc(dev);
2019 		if (!ldev) {
2020 			mlx5_core_err(dev, "Failed to alloc lag dev\n");
2021 			return 0;
2022 		}
2023 		err = mlx5_ldev_add_mdev(ldev, dev);
2024 		if (err) {
2025 			mlx5_core_err(dev, "Failed to add mdev to lag dev\n");
2026 			mlx5_ldev_put(ldev);
2027 			return 0;
2028 		}
2029 		return 0;
2030 	}
2031 
2032 	mutex_lock(&ldev->lock);
2033 	if (ldev->mode_changes_in_progress) {
2034 		mutex_unlock(&ldev->lock);
2035 		return -EAGAIN;
2036 	}
2037 	mlx5_ldev_get(ldev);
2038 	err = mlx5_ldev_add_mdev(ldev, dev);
2039 	if (err) {
2040 		mlx5_ldev_put(ldev);
2041 		mutex_unlock(&ldev->lock);
2042 		return err;
2043 	}
2044 	mutex_unlock(&ldev->lock);
2045 
2046 	return 0;
2047 }
2048 
mlx5_lag_unregister_hca_devcom_comp(struct mlx5_core_dev * dev)2049 static void mlx5_lag_unregister_hca_devcom_comp(struct mlx5_core_dev *dev)
2050 {
2051 	mlx5_devcom_unregister_component(dev->priv.hca_devcom_comp);
2052 	dev->priv.hca_devcom_comp = NULL;
2053 }
2054 
mlx5_lag_register_hca_devcom_comp(struct mlx5_core_dev * dev)2055 static int mlx5_lag_register_hca_devcom_comp(struct mlx5_core_dev *dev)
2056 {
2057 	struct mlx5_devcom_match_attr attr = {
2058 		.flags = MLX5_DEVCOM_MATCH_FLAGS_NS,
2059 		.net = mlx5_core_net(dev),
2060 	};
2061 	u8 len __always_unused;
2062 
2063 	mlx5_query_nic_sw_system_image_guid(dev, attr.key.buf, &len);
2064 
2065 	/* This component is use to sync adding core_dev to lag_dev and to sync
2066 	 * changes of mlx5_adev_devices between LAG layer and other layers.
2067 	 */
2068 	dev->priv.hca_devcom_comp =
2069 		mlx5_devcom_register_component(dev->priv.devc,
2070 					       MLX5_DEVCOM_HCA_PORTS,
2071 					       &attr, mlx5_lag_devcom_event,
2072 					       dev);
2073 	if (!dev->priv.hca_devcom_comp) {
2074 		mlx5_core_err(dev,
2075 			      "Failed to register devcom HCA component.");
2076 		return -EINVAL;
2077 	}
2078 
2079 	return 0;
2080 }
2081 
mlx5_lag_remove_mdev(struct mlx5_core_dev * dev)2082 void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
2083 {
2084 	struct mlx5_lag *ldev;
2085 
2086 	ldev = mlx5_lag_dev(dev);
2087 	if (!ldev)
2088 		return;
2089 
2090 	/* mdev is being removed, might as well remove debugfs
2091 	 * as early as possible.
2092 	 */
2093 	mlx5_ldev_remove_debugfs(dev->priv.dbg.lag_debugfs);
2094 recheck:
2095 	mutex_lock(&ldev->lock);
2096 	if (ldev->mode_changes_in_progress) {
2097 		mutex_unlock(&ldev->lock);
2098 		msleep(100);
2099 		goto recheck;
2100 	}
2101 	mlx5_ldev_remove_mdev(ldev, dev);
2102 	mutex_unlock(&ldev->lock);
2103 	/* Send devcom event to notify peers that a device is being removed */
2104 	mlx5_devcom_send_event(dev->priv.hca_devcom_comp,
2105 			       LAG_DEVCOM_UNPAIR, LAG_DEVCOM_UNPAIR, dev);
2106 	mlx5_lag_unregister_hca_devcom_comp(dev);
2107 	mlx5_ldev_put(ldev);
2108 }
2109 
mlx5_lag_add_mdev(struct mlx5_core_dev * dev)2110 void mlx5_lag_add_mdev(struct mlx5_core_dev *dev)
2111 {
2112 	int err;
2113 
2114 	if (!mlx5_lag_is_supported(dev))
2115 		return;
2116 
2117 	if (mlx5_lag_register_hca_devcom_comp(dev))
2118 		return;
2119 
2120 recheck:
2121 	mlx5_devcom_comp_lock(dev->priv.hca_devcom_comp);
2122 	err = __mlx5_lag_dev_add_mdev(dev);
2123 	mlx5_devcom_comp_unlock(dev->priv.hca_devcom_comp);
2124 
2125 	if (err) {
2126 		msleep(100);
2127 		goto recheck;
2128 	}
2129 	/* Send devcom event to notify peers that a device was added */
2130 	mlx5_devcom_send_event(dev->priv.hca_devcom_comp,
2131 			       LAG_DEVCOM_PAIR, LAG_DEVCOM_UNPAIR, dev);
2132 	mlx5_ldev_add_debugfs(dev);
2133 }
2134 
mlx5_lag_remove_netdev(struct mlx5_core_dev * dev,struct net_device * netdev)2135 void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev,
2136 			    struct net_device *netdev)
2137 {
2138 	struct mlx5_lag *ldev;
2139 	bool lag_is_active;
2140 
2141 	ldev = mlx5_lag_dev(dev);
2142 	if (!ldev)
2143 		return;
2144 
2145 	mutex_lock(&ldev->lock);
2146 	mlx5_ldev_remove_netdev(ldev, netdev);
2147 	clear_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
2148 
2149 	lag_is_active = __mlx5_lag_is_active(ldev);
2150 	mutex_unlock(&ldev->lock);
2151 
2152 	if (lag_is_active)
2153 		mlx5_queue_bond_work(ldev, 0);
2154 }
2155 
mlx5_lag_add_netdev(struct mlx5_core_dev * dev,struct net_device * netdev)2156 void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
2157 			 struct net_device *netdev)
2158 {
2159 	struct mlx5_lag *ldev;
2160 	int num = 0;
2161 
2162 	ldev = mlx5_lag_dev(dev);
2163 	if (!ldev)
2164 		return;
2165 
2166 	mutex_lock(&ldev->lock);
2167 	mlx5_ldev_add_netdev(ldev, dev, netdev);
2168 	num = mlx5_lag_num_netdevs(ldev);
2169 	if (num >= ldev->ports)
2170 		set_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
2171 	mutex_unlock(&ldev->lock);
2172 	mlx5_queue_bond_work(ldev, 0);
2173 }
2174 
mlx5_get_pre_ldev_func(struct mlx5_lag * ldev,int start_idx,int end_idx)2175 int mlx5_get_pre_ldev_func(struct mlx5_lag *ldev, int start_idx, int end_idx)
2176 {
2177 	struct lag_func *pf;
2178 	int i;
2179 
2180 	for (i = start_idx; i >= end_idx; i--) {
2181 		pf = xa_load(&ldev->pfs, i);
2182 		if (pf && pf->dev)
2183 			return i;
2184 	}
2185 	return -1;
2186 }
2187 
mlx5_get_next_ldev_func(struct mlx5_lag * ldev,int start_idx)2188 int mlx5_get_next_ldev_func(struct mlx5_lag *ldev, int start_idx)
2189 {
2190 	struct lag_func *pf;
2191 	unsigned long idx;
2192 
2193 	xa_for_each_start(&ldev->pfs, idx, pf, start_idx)
2194 		if (pf->dev)
2195 			return idx;
2196 	return MLX5_MAX_PORTS;
2197 }
2198 
mlx5_lag_is_roce(struct mlx5_core_dev * dev)2199 bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
2200 {
2201 	struct mlx5_lag *ldev;
2202 	unsigned long flags;
2203 	bool res;
2204 
2205 	spin_lock_irqsave(&lag_lock, flags);
2206 	ldev = mlx5_lag_dev(dev);
2207 	res  = ldev && __mlx5_lag_is_roce(ldev);
2208 	spin_unlock_irqrestore(&lag_lock, flags);
2209 
2210 	return res;
2211 }
2212 EXPORT_SYMBOL(mlx5_lag_is_roce);
2213 
mlx5_lag_is_active(struct mlx5_core_dev * dev)2214 bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
2215 {
2216 	struct mlx5_lag *ldev;
2217 	unsigned long flags;
2218 	bool res;
2219 
2220 	spin_lock_irqsave(&lag_lock, flags);
2221 	ldev = mlx5_lag_dev(dev);
2222 	res  = ldev && __mlx5_lag_is_active(ldev);
2223 	spin_unlock_irqrestore(&lag_lock, flags);
2224 
2225 	return res;
2226 }
2227 EXPORT_SYMBOL(mlx5_lag_is_active);
2228 
mlx5_lag_mode_is_hash(struct mlx5_core_dev * dev)2229 bool mlx5_lag_mode_is_hash(struct mlx5_core_dev *dev)
2230 {
2231 	struct mlx5_lag *ldev;
2232 	unsigned long flags;
2233 	bool res = 0;
2234 
2235 	spin_lock_irqsave(&lag_lock, flags);
2236 	ldev = mlx5_lag_dev(dev);
2237 	if (ldev)
2238 		res = test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags);
2239 	spin_unlock_irqrestore(&lag_lock, flags);
2240 
2241 	return res;
2242 }
2243 EXPORT_SYMBOL(mlx5_lag_mode_is_hash);
2244 
mlx5_lag_is_master(struct mlx5_core_dev * dev)2245 bool mlx5_lag_is_master(struct mlx5_core_dev *dev)
2246 {
2247 	struct mlx5_lag *ldev;
2248 	unsigned long flags;
2249 	struct lag_func *pf;
2250 	bool res = false;
2251 	int idx;
2252 
2253 	spin_lock_irqsave(&lag_lock, flags);
2254 	ldev = mlx5_lag_dev(dev);
2255 	idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
2256 	if (ldev && __mlx5_lag_is_active(ldev) && idx >= 0) {
2257 		pf = mlx5_lag_pf(ldev, idx);
2258 		res = pf && dev == pf->dev;
2259 	}
2260 	spin_unlock_irqrestore(&lag_lock, flags);
2261 
2262 	return res;
2263 }
2264 EXPORT_SYMBOL(mlx5_lag_is_master);
2265 
mlx5_lag_is_sriov(struct mlx5_core_dev * dev)2266 bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
2267 {
2268 	struct mlx5_lag *ldev;
2269 	unsigned long flags;
2270 	bool res;
2271 
2272 	spin_lock_irqsave(&lag_lock, flags);
2273 	ldev = mlx5_lag_dev(dev);
2274 	res  = ldev && __mlx5_lag_is_sriov(ldev);
2275 	spin_unlock_irqrestore(&lag_lock, flags);
2276 
2277 	return res;
2278 }
2279 EXPORT_SYMBOL(mlx5_lag_is_sriov);
2280 
mlx5_lag_is_shared_fdb(struct mlx5_core_dev * dev)2281 bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev)
2282 {
2283 	struct mlx5_lag *ldev;
2284 	unsigned long flags;
2285 	bool res;
2286 
2287 	spin_lock_irqsave(&lag_lock, flags);
2288 	ldev = mlx5_lag_dev(dev);
2289 	res = ldev && test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
2290 	spin_unlock_irqrestore(&lag_lock, flags);
2291 
2292 	return res;
2293 }
2294 EXPORT_SYMBOL(mlx5_lag_is_shared_fdb);
2295 
mlx5_lag_disable_change(struct mlx5_core_dev * dev)2296 void mlx5_lag_disable_change(struct mlx5_core_dev *dev)
2297 {
2298 	struct mlx5_lag *ldev;
2299 
2300 	ldev = mlx5_lag_dev(dev);
2301 	if (!ldev)
2302 		return;
2303 
2304 	mlx5_devcom_comp_lock(dev->priv.hca_devcom_comp);
2305 	mutex_lock(&ldev->lock);
2306 
2307 	ldev->mode_changes_in_progress++;
2308 	if (__mlx5_lag_is_active(ldev)) {
2309 		if (ldev->mode == MLX5_LAG_MODE_MPESW)
2310 			mlx5_lag_disable_mpesw(ldev);
2311 		else
2312 			mlx5_disable_lag(ldev);
2313 	}
2314 
2315 	mutex_unlock(&ldev->lock);
2316 	mlx5_devcom_comp_unlock(dev->priv.hca_devcom_comp);
2317 }
2318 
mlx5_lag_enable_change(struct mlx5_core_dev * dev)2319 void mlx5_lag_enable_change(struct mlx5_core_dev *dev)
2320 {
2321 	struct mlx5_lag *ldev;
2322 
2323 	ldev = mlx5_lag_dev(dev);
2324 	if (!ldev)
2325 		return;
2326 
2327 	mutex_lock(&ldev->lock);
2328 	ldev->mode_changes_in_progress--;
2329 	mutex_unlock(&ldev->lock);
2330 	mlx5_queue_bond_work(ldev, 0);
2331 }
2332 
mlx5_lag_get_slave_port(struct mlx5_core_dev * dev,struct net_device * slave)2333 u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
2334 			   struct net_device *slave)
2335 {
2336 	struct mlx5_lag *ldev;
2337 	unsigned long flags;
2338 	struct lag_func *pf;
2339 	u8 port = 0;
2340 	int i;
2341 
2342 	spin_lock_irqsave(&lag_lock, flags);
2343 	ldev = mlx5_lag_dev(dev);
2344 	if (!(ldev && __mlx5_lag_is_roce(ldev)))
2345 		goto unlock;
2346 
2347 	mlx5_ldev_for_each(i, 0, ldev) {
2348 		pf = mlx5_lag_pf(ldev, i);
2349 		if (pf->netdev == slave) {
2350 			port = i;
2351 			break;
2352 		}
2353 	}
2354 
2355 	port = ldev->v2p_map[port * ldev->buckets];
2356 
2357 unlock:
2358 	spin_unlock_irqrestore(&lag_lock, flags);
2359 	return port;
2360 }
2361 EXPORT_SYMBOL(mlx5_lag_get_slave_port);
2362 
mlx5_lag_get_num_ports(struct mlx5_core_dev * dev)2363 u8 mlx5_lag_get_num_ports(struct mlx5_core_dev *dev)
2364 {
2365 	struct mlx5_lag *ldev;
2366 
2367 	ldev = mlx5_lag_dev(dev);
2368 	if (!ldev)
2369 		return 0;
2370 
2371 	return ldev->ports;
2372 }
2373 EXPORT_SYMBOL(mlx5_lag_get_num_ports);
2374 
mlx5_lag_get_next_peer_mdev(struct mlx5_core_dev * dev,int * i)2375 struct mlx5_core_dev *mlx5_lag_get_next_peer_mdev(struct mlx5_core_dev *dev, int *i)
2376 {
2377 	struct mlx5_core_dev *peer_dev = NULL;
2378 	struct mlx5_lag *ldev;
2379 	unsigned long flags;
2380 	struct lag_func *pf;
2381 	int idx;
2382 
2383 	spin_lock_irqsave(&lag_lock, flags);
2384 	ldev = mlx5_lag_dev(dev);
2385 	if (!ldev)
2386 		goto unlock;
2387 
2388 	if (*i == MLX5_MAX_PORTS)
2389 		goto unlock;
2390 	mlx5_ldev_for_each(idx, *i, ldev) {
2391 		pf = mlx5_lag_pf(ldev, idx);
2392 		if (pf->dev != dev)
2393 			break;
2394 	}
2395 
2396 	if (idx == MLX5_MAX_PORTS) {
2397 		*i = idx;
2398 		goto unlock;
2399 	}
2400 	*i = idx + 1;
2401 
2402 	pf = mlx5_lag_pf(ldev, idx);
2403 	peer_dev = pf->dev;
2404 
2405 unlock:
2406 	spin_unlock_irqrestore(&lag_lock, flags);
2407 	return peer_dev;
2408 }
2409 EXPORT_SYMBOL(mlx5_lag_get_next_peer_mdev);
2410 
mlx5_lag_query_cong_counters(struct mlx5_core_dev * dev,u64 * values,int num_counters,size_t * offsets)2411 int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
2412 				 u64 *values,
2413 				 int num_counters,
2414 				 size_t *offsets)
2415 {
2416 	int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
2417 	struct mlx5_core_dev **mdev;
2418 	int ret = 0, i, j, idx = 0;
2419 	struct mlx5_lag *ldev;
2420 	unsigned long flags;
2421 	struct lag_func *pf;
2422 	int num_ports;
2423 	void *out;
2424 
2425 	out = kvzalloc(outlen, GFP_KERNEL);
2426 	if (!out)
2427 		return -ENOMEM;
2428 
2429 	mdev = kvzalloc(sizeof(mdev[0]) * MLX5_MAX_PORTS, GFP_KERNEL);
2430 	if (!mdev) {
2431 		ret = -ENOMEM;
2432 		goto free_out;
2433 	}
2434 
2435 	memset(values, 0, sizeof(*values) * num_counters);
2436 
2437 	spin_lock_irqsave(&lag_lock, flags);
2438 	ldev = mlx5_lag_dev(dev);
2439 	if (ldev && __mlx5_lag_is_active(ldev)) {
2440 		num_ports = ldev->ports;
2441 		mlx5_ldev_for_each(i, 0, ldev) {
2442 			pf = mlx5_lag_pf(ldev, i);
2443 			mdev[idx++] = pf->dev;
2444 		}
2445 	} else {
2446 		num_ports = 1;
2447 		mdev[MLX5_LAG_P1] = dev;
2448 	}
2449 	spin_unlock_irqrestore(&lag_lock, flags);
2450 
2451 	for (i = 0; i < num_ports; ++i) {
2452 		u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {};
2453 
2454 		MLX5_SET(query_cong_statistics_in, in, opcode,
2455 			 MLX5_CMD_OP_QUERY_CONG_STATISTICS);
2456 		ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in,
2457 					  out);
2458 		if (ret)
2459 			goto free_mdev;
2460 
2461 		for (j = 0; j < num_counters; ++j)
2462 			values[j] += be64_to_cpup((__be64 *)(out + offsets[j]));
2463 	}
2464 
2465 free_mdev:
2466 	kvfree(mdev);
2467 free_out:
2468 	kvfree(out);
2469 	return ret;
2470 }
2471 EXPORT_SYMBOL(mlx5_lag_query_cong_counters);
2472