1 /*
2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33 #include <linux/netdevice.h>
34 #include <net/bonding.h>
35 #include <linux/mlx5/driver.h>
36 #include <linux/mlx5/eswitch.h>
37 #include <linux/mlx5/vport.h>
38 #include "lib/mlx5.h"
39 #include "lib/devcom.h"
40 #include "mlx5_core.h"
41 #include "eswitch.h"
42 #include "esw/acl/ofld.h"
43 #include "lag.h"
44 #include "mp.h"
45 #include "mpesw.h"
46
47
48 /* General purpose, use for short periods of time.
49 * Beware of lock dependencies (preferably, no locks should be acquired
50 * under it).
51 */
52 static DEFINE_SPINLOCK(lag_lock);
53
get_port_sel_mode(enum mlx5_lag_mode mode,unsigned long flags)54 static int get_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags)
55 {
56 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
57 return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT;
58
59 if (mode == MLX5_LAG_MODE_MPESW)
60 return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW;
61
62 return MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY;
63 }
64
lag_active_port_bits(struct mlx5_lag * ldev)65 static u8 lag_active_port_bits(struct mlx5_lag *ldev)
66 {
67 u8 enabled_ports[MLX5_MAX_PORTS] = {};
68 u8 active_port = 0;
69 int num_enabled;
70 int idx;
71
72 mlx5_infer_tx_enabled(&ldev->tracker, ldev, enabled_ports,
73 &num_enabled);
74 for (idx = 0; idx < num_enabled; idx++)
75 active_port |= BIT_MASK(enabled_ports[idx]);
76
77 return active_port;
78 }
79
mlx5_cmd_create_lag(struct mlx5_core_dev * dev,struct mlx5_lag * ldev,int mode,unsigned long flags)80 static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, struct mlx5_lag *ldev,
81 int mode, unsigned long flags)
82 {
83 bool fdb_sel_mode = test_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE,
84 &flags);
85 int port_sel_mode = get_port_sel_mode(mode, flags);
86 u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {};
87 u8 *ports = ldev->v2p_map;
88 int idx0, idx1;
89 void *lag_ctx;
90
91 lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
92 MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG);
93 MLX5_SET(lagc, lag_ctx, fdb_selection_mode, fdb_sel_mode);
94 idx0 = mlx5_lag_get_dev_index_by_seq(ldev, 0);
95 idx1 = mlx5_lag_get_dev_index_by_seq(ldev, 1);
96
97 if (idx0 < 0 || idx1 < 0)
98 return -EINVAL;
99
100 switch (port_sel_mode) {
101 case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY:
102 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[idx0]);
103 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[idx1]);
104 break;
105 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT:
106 if (!MLX5_CAP_PORT_SELECTION(dev, port_select_flow_table_bypass))
107 break;
108
109 MLX5_SET(lagc, lag_ctx, active_port,
110 lag_active_port_bits(mlx5_lag_dev(dev)));
111 break;
112 default:
113 break;
114 }
115 MLX5_SET(lagc, lag_ctx, port_select_mode, port_sel_mode);
116
117 return mlx5_cmd_exec_in(dev, create_lag, in);
118 }
119
mlx5_cmd_modify_lag(struct mlx5_core_dev * dev,struct mlx5_lag * ldev,u8 * ports)120 static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, struct mlx5_lag *ldev,
121 u8 *ports)
122 {
123 u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
124 void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
125 int idx0, idx1;
126
127 idx0 = mlx5_lag_get_dev_index_by_seq(ldev, 0);
128 idx1 = mlx5_lag_get_dev_index_by_seq(ldev, 1);
129 if (idx0 < 0 || idx1 < 0)
130 return -EINVAL;
131
132 MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
133 MLX5_SET(modify_lag_in, in, field_select, 0x1);
134
135 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[idx0]);
136 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[idx1]);
137
138 return mlx5_cmd_exec_in(dev, modify_lag, in);
139 }
140
mlx5_cmd_create_vport_lag(struct mlx5_core_dev * dev)141 int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev)
142 {
143 u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {};
144
145 MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG);
146
147 return mlx5_cmd_exec_in(dev, create_vport_lag, in);
148 }
149 EXPORT_SYMBOL(mlx5_cmd_create_vport_lag);
150
mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev * dev)151 int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
152 {
153 u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {};
154
155 MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG);
156
157 return mlx5_cmd_exec_in(dev, destroy_vport_lag, in);
158 }
159 EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
160
mlx5_infer_tx_disabled(struct lag_tracker * tracker,struct mlx5_lag * ldev,u8 * ports,int * num_disabled)161 static void mlx5_infer_tx_disabled(struct lag_tracker *tracker, struct mlx5_lag *ldev,
162 u8 *ports, int *num_disabled)
163 {
164 int i;
165
166 *num_disabled = 0;
167 mlx5_ldev_for_each(i, 0, ldev)
168 if (!tracker->netdev_state[i].tx_enabled ||
169 !tracker->netdev_state[i].link_up)
170 ports[(*num_disabled)++] = i;
171 }
172
mlx5_infer_tx_enabled(struct lag_tracker * tracker,struct mlx5_lag * ldev,u8 * ports,int * num_enabled)173 void mlx5_infer_tx_enabled(struct lag_tracker *tracker, struct mlx5_lag *ldev,
174 u8 *ports, int *num_enabled)
175 {
176 int i;
177
178 *num_enabled = 0;
179 mlx5_ldev_for_each(i, 0, ldev)
180 if (tracker->netdev_state[i].tx_enabled &&
181 tracker->netdev_state[i].link_up)
182 ports[(*num_enabled)++] = i;
183
184 if (*num_enabled == 0)
185 mlx5_infer_tx_disabled(tracker, ldev, ports, num_enabled);
186 }
187
mlx5_lag_print_mapping(struct mlx5_core_dev * dev,struct mlx5_lag * ldev,struct lag_tracker * tracker,unsigned long flags)188 static void mlx5_lag_print_mapping(struct mlx5_core_dev *dev,
189 struct mlx5_lag *ldev,
190 struct lag_tracker *tracker,
191 unsigned long flags)
192 {
193 char buf[MLX5_MAX_PORTS * 10 + 1] = {};
194 u8 enabled_ports[MLX5_MAX_PORTS] = {};
195 int written = 0;
196 int num_enabled;
197 int idx;
198 int err;
199 int i;
200 int j;
201
202 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
203 mlx5_infer_tx_enabled(tracker, ldev, enabled_ports,
204 &num_enabled);
205 for (i = 0; i < num_enabled; i++) {
206 err = scnprintf(buf + written, 4, "%d, ", enabled_ports[i] + 1);
207 if (err != 3)
208 return;
209 written += err;
210 }
211 buf[written - 2] = 0;
212 mlx5_core_info(dev, "lag map active ports: %s\n", buf);
213 } else {
214 mlx5_ldev_for_each(i, 0, ldev) {
215 for (j = 0; j < ldev->buckets; j++) {
216 idx = i * ldev->buckets + j;
217 err = scnprintf(buf + written, 10,
218 " port %d:%d", i + 1, ldev->v2p_map[idx]);
219 if (err != 9)
220 return;
221 written += err;
222 }
223 }
224 mlx5_core_info(dev, "lag map:%s\n", buf);
225 }
226 }
227
228 static int mlx5_lag_netdev_event(struct notifier_block *this,
229 unsigned long event, void *ptr);
230 static void mlx5_do_bond_work(struct work_struct *work);
231
mlx5_ldev_free(struct kref * ref)232 static void mlx5_ldev_free(struct kref *ref)
233 {
234 struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref);
235 struct net *net;
236 int i;
237
238 if (ldev->nb.notifier_call) {
239 net = read_pnet(&ldev->net);
240 unregister_netdevice_notifier_net(net, &ldev->nb);
241 }
242
243 mlx5_ldev_for_each(i, 0, ldev) {
244 if (ldev->pf[i].dev &&
245 ldev->pf[i].port_change_nb.nb.notifier_call) {
246 struct mlx5_nb *nb = &ldev->pf[i].port_change_nb;
247
248 mlx5_eq_notifier_unregister(ldev->pf[i].dev, nb);
249 }
250 }
251
252 mlx5_lag_mp_cleanup(ldev);
253 cancel_delayed_work_sync(&ldev->bond_work);
254 cancel_work_sync(&ldev->speed_update_work);
255 destroy_workqueue(ldev->wq);
256 mutex_destroy(&ldev->lock);
257 kfree(ldev);
258 }
259
mlx5_ldev_put(struct mlx5_lag * ldev)260 static void mlx5_ldev_put(struct mlx5_lag *ldev)
261 {
262 kref_put(&ldev->ref, mlx5_ldev_free);
263 }
264
mlx5_ldev_get(struct mlx5_lag * ldev)265 static void mlx5_ldev_get(struct mlx5_lag *ldev)
266 {
267 kref_get(&ldev->ref);
268 }
269
mlx5_lag_dev_alloc(struct mlx5_core_dev * dev)270 static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev)
271 {
272 struct mlx5_lag *ldev;
273 int err;
274
275 ldev = kzalloc_obj(*ldev);
276 if (!ldev)
277 return NULL;
278
279 ldev->wq = create_singlethread_workqueue("mlx5_lag");
280 if (!ldev->wq) {
281 kfree(ldev);
282 return NULL;
283 }
284
285 kref_init(&ldev->ref);
286 mutex_init(&ldev->lock);
287 INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
288 INIT_WORK(&ldev->speed_update_work, mlx5_mpesw_speed_update_work);
289
290 ldev->nb.notifier_call = mlx5_lag_netdev_event;
291 write_pnet(&ldev->net, mlx5_core_net(dev));
292 if (register_netdevice_notifier_net(read_pnet(&ldev->net), &ldev->nb)) {
293 ldev->nb.notifier_call = NULL;
294 mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
295 }
296 ldev->mode = MLX5_LAG_MODE_NONE;
297
298 err = mlx5_lag_mp_init(ldev);
299 if (err)
300 mlx5_core_err(dev, "Failed to init multipath lag err=%d\n",
301 err);
302
303 ldev->ports = MLX5_CAP_GEN(dev, num_lag_ports);
304 ldev->buckets = 1;
305
306 return ldev;
307 }
308
mlx5_lag_dev_get_netdev_idx(struct mlx5_lag * ldev,struct net_device * ndev)309 int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
310 struct net_device *ndev)
311 {
312 int i;
313
314 mlx5_ldev_for_each(i, 0, ldev)
315 if (ldev->pf[i].netdev == ndev)
316 return i;
317
318 return -ENOENT;
319 }
320
mlx5_lag_get_dev_index_by_seq(struct mlx5_lag * ldev,int seq)321 int mlx5_lag_get_dev_index_by_seq(struct mlx5_lag *ldev, int seq)
322 {
323 int i, num = 0;
324
325 if (!ldev)
326 return -ENOENT;
327
328 mlx5_ldev_for_each(i, 0, ldev) {
329 if (num == seq)
330 return i;
331 num++;
332 }
333 return -ENOENT;
334 }
335
mlx5_lag_num_devs(struct mlx5_lag * ldev)336 int mlx5_lag_num_devs(struct mlx5_lag *ldev)
337 {
338 int i, num = 0;
339
340 if (!ldev)
341 return 0;
342
343 mlx5_ldev_for_each(i, 0, ldev) {
344 (void)i;
345 num++;
346 }
347 return num;
348 }
349
mlx5_lag_num_netdevs(struct mlx5_lag * ldev)350 int mlx5_lag_num_netdevs(struct mlx5_lag *ldev)
351 {
352 int i, num = 0;
353
354 if (!ldev)
355 return 0;
356
357 mlx5_ldev_for_each(i, 0, ldev)
358 if (ldev->pf[i].netdev)
359 num++;
360 return num;
361 }
362
__mlx5_lag_is_roce(struct mlx5_lag * ldev)363 static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev)
364 {
365 return ldev->mode == MLX5_LAG_MODE_ROCE;
366 }
367
__mlx5_lag_is_sriov(struct mlx5_lag * ldev)368 static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev)
369 {
370 return ldev->mode == MLX5_LAG_MODE_SRIOV;
371 }
372
373 /* Create a mapping between steering slots and active ports.
374 * As we have ldev->buckets slots per port first assume the native
375 * mapping should be used.
376 * If there are ports that are disabled fill the relevant slots
377 * with mapping that points to active ports.
378 */
mlx5_infer_tx_affinity_mapping(struct lag_tracker * tracker,struct mlx5_lag * ldev,u8 buckets,u8 * ports)379 static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
380 struct mlx5_lag *ldev,
381 u8 buckets,
382 u8 *ports)
383 {
384 int disabled[MLX5_MAX_PORTS] = {};
385 int enabled[MLX5_MAX_PORTS] = {};
386 int disabled_ports_num = 0;
387 int enabled_ports_num = 0;
388 int idx;
389 u32 rand;
390 int i;
391 int j;
392
393 mlx5_ldev_for_each(i, 0, ldev) {
394 if (tracker->netdev_state[i].tx_enabled &&
395 tracker->netdev_state[i].link_up)
396 enabled[enabled_ports_num++] = i;
397 else
398 disabled[disabled_ports_num++] = i;
399 }
400
401 /* Use native mapping by default where each port's buckets
402 * point the native port: 1 1 1 .. 1 2 2 2 ... 2 3 3 3 ... 3 etc
403 */
404 mlx5_ldev_for_each(i, 0, ldev) {
405 for (j = 0; j < buckets; j++) {
406 idx = i * buckets + j;
407 ports[idx] = i + 1;
408 }
409 }
410
411 /* If all ports are disabled/enabled keep native mapping */
412 if (enabled_ports_num == ldev->ports ||
413 disabled_ports_num == ldev->ports)
414 return;
415
416 /* Go over the disabled ports and for each assign a random active port */
417 for (i = 0; i < disabled_ports_num; i++) {
418 for (j = 0; j < buckets; j++) {
419 get_random_bytes(&rand, 4);
420 ports[disabled[i] * buckets + j] = enabled[rand % enabled_ports_num] + 1;
421 }
422 }
423 }
424
mlx5_lag_has_drop_rule(struct mlx5_lag * ldev)425 static bool mlx5_lag_has_drop_rule(struct mlx5_lag *ldev)
426 {
427 int i;
428
429 mlx5_ldev_for_each(i, 0, ldev)
430 if (ldev->pf[i].has_drop)
431 return true;
432 return false;
433 }
434
mlx5_lag_drop_rule_cleanup(struct mlx5_lag * ldev)435 static void mlx5_lag_drop_rule_cleanup(struct mlx5_lag *ldev)
436 {
437 int i;
438
439 mlx5_ldev_for_each(i, 0, ldev) {
440 if (!ldev->pf[i].has_drop)
441 continue;
442
443 mlx5_esw_acl_ingress_vport_drop_rule_destroy(ldev->pf[i].dev->priv.eswitch,
444 MLX5_VPORT_UPLINK);
445 ldev->pf[i].has_drop = false;
446 }
447 }
448
mlx5_lag_drop_rule_setup(struct mlx5_lag * ldev,struct lag_tracker * tracker)449 static void mlx5_lag_drop_rule_setup(struct mlx5_lag *ldev,
450 struct lag_tracker *tracker)
451 {
452 u8 disabled_ports[MLX5_MAX_PORTS] = {};
453 struct mlx5_core_dev *dev;
454 int disabled_index;
455 int num_disabled;
456 int err;
457 int i;
458
459 /* First delete the current drop rule so there won't be any dropped
460 * packets
461 */
462 mlx5_lag_drop_rule_cleanup(ldev);
463
464 if (!ldev->tracker.has_inactive)
465 return;
466
467 mlx5_infer_tx_disabled(tracker, ldev, disabled_ports, &num_disabled);
468
469 for (i = 0; i < num_disabled; i++) {
470 disabled_index = disabled_ports[i];
471 dev = ldev->pf[disabled_index].dev;
472 err = mlx5_esw_acl_ingress_vport_drop_rule_create(dev->priv.eswitch,
473 MLX5_VPORT_UPLINK);
474 if (!err)
475 ldev->pf[disabled_index].has_drop = true;
476 else
477 mlx5_core_err(dev,
478 "Failed to create lag drop rule, error: %d", err);
479 }
480 }
481
mlx5_cmd_modify_active_port(struct mlx5_core_dev * dev,u8 ports)482 static int mlx5_cmd_modify_active_port(struct mlx5_core_dev *dev, u8 ports)
483 {
484 u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
485 void *lag_ctx;
486
487 lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
488
489 MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
490 MLX5_SET(modify_lag_in, in, field_select, 0x2);
491
492 MLX5_SET(lagc, lag_ctx, active_port, ports);
493
494 return mlx5_cmd_exec_in(dev, modify_lag, in);
495 }
496
_mlx5_modify_lag(struct mlx5_lag * ldev,u8 * ports)497 static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports)
498 {
499 int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
500 struct mlx5_core_dev *dev0;
501 u8 active_ports;
502 int ret;
503
504 if (idx < 0)
505 return -EINVAL;
506
507 dev0 = ldev->pf[idx].dev;
508 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags)) {
509 ret = mlx5_lag_port_sel_modify(ldev, ports);
510 if (ret ||
511 !MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table_bypass))
512 return ret;
513
514 active_ports = lag_active_port_bits(ldev);
515
516 return mlx5_cmd_modify_active_port(dev0, active_ports);
517 }
518 return mlx5_cmd_modify_lag(dev0, ldev, ports);
519 }
520
mlx5_lag_active_backup_get_netdev(struct mlx5_core_dev * dev)521 static struct net_device *mlx5_lag_active_backup_get_netdev(struct mlx5_core_dev *dev)
522 {
523 struct net_device *ndev = NULL;
524 struct mlx5_lag *ldev;
525 unsigned long flags;
526 int i, last_idx;
527
528 spin_lock_irqsave(&lag_lock, flags);
529 ldev = mlx5_lag_dev(dev);
530
531 if (!ldev)
532 goto unlock;
533
534 mlx5_ldev_for_each(i, 0, ldev)
535 if (ldev->tracker.netdev_state[i].tx_enabled)
536 ndev = ldev->pf[i].netdev;
537 if (!ndev) {
538 last_idx = mlx5_lag_get_dev_index_by_seq(ldev, ldev->ports - 1);
539 if (last_idx < 0)
540 goto unlock;
541 ndev = ldev->pf[last_idx].netdev;
542 }
543
544 dev_hold(ndev);
545
546 unlock:
547 spin_unlock_irqrestore(&lag_lock, flags);
548
549 return ndev;
550 }
551
mlx5_modify_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker)552 void mlx5_modify_lag(struct mlx5_lag *ldev,
553 struct lag_tracker *tracker)
554 {
555 int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
556 u8 ports[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS] = {};
557 struct mlx5_core_dev *dev0;
558 int idx;
559 int err;
560 int i;
561 int j;
562
563 if (first_idx < 0)
564 return;
565
566 dev0 = ldev->pf[first_idx].dev;
567 mlx5_infer_tx_affinity_mapping(tracker, ldev, ldev->buckets, ports);
568
569 mlx5_ldev_for_each(i, 0, ldev) {
570 for (j = 0; j < ldev->buckets; j++) {
571 idx = i * ldev->buckets + j;
572 if (ports[idx] == ldev->v2p_map[idx])
573 continue;
574 err = _mlx5_modify_lag(ldev, ports);
575 if (err) {
576 mlx5_core_err(dev0,
577 "Failed to modify LAG (%d)\n",
578 err);
579 return;
580 }
581 memcpy(ldev->v2p_map, ports, sizeof(ports));
582
583 mlx5_lag_print_mapping(dev0, ldev, tracker,
584 ldev->mode_flags);
585 break;
586 }
587 }
588
589 if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
590 struct net_device *ndev = mlx5_lag_active_backup_get_netdev(dev0);
591
592 if(!(ldev->mode == MLX5_LAG_MODE_ROCE))
593 mlx5_lag_drop_rule_setup(ldev, tracker);
594 /** Only sriov and roce lag should have tracker->tx_type set so
595 * no need to check the mode
596 */
597 blocking_notifier_call_chain(&dev0->priv.lag_nh,
598 MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE,
599 ndev);
600 dev_put(ndev);
601 }
602 }
603
mlx5_lag_set_port_sel_mode(struct mlx5_lag * ldev,enum mlx5_lag_mode mode,unsigned long * flags)604 static int mlx5_lag_set_port_sel_mode(struct mlx5_lag *ldev,
605 enum mlx5_lag_mode mode,
606 unsigned long *flags)
607 {
608 int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
609 struct mlx5_core_dev *dev0;
610
611 if (first_idx < 0)
612 return -EINVAL;
613
614 if (mode == MLX5_LAG_MODE_MPESW ||
615 mode == MLX5_LAG_MODE_MULTIPATH)
616 return 0;
617
618 dev0 = ldev->pf[first_idx].dev;
619
620 if (!MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table)) {
621 if (ldev->ports > 2)
622 return -EINVAL;
623 return 0;
624 }
625
626 if (ldev->ports > 2)
627 ldev->buckets = MLX5_LAG_MAX_HASH_BUCKETS;
628
629 set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags);
630
631 return 0;
632 }
633
mlx5_lag_set_flags(struct mlx5_lag * ldev,enum mlx5_lag_mode mode,struct lag_tracker * tracker,bool shared_fdb,unsigned long * flags)634 static int mlx5_lag_set_flags(struct mlx5_lag *ldev, enum mlx5_lag_mode mode,
635 struct lag_tracker *tracker, bool shared_fdb,
636 unsigned long *flags)
637 {
638 *flags = 0;
639 if (shared_fdb) {
640 set_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, flags);
641 set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags);
642 }
643
644 if (mode == MLX5_LAG_MODE_MPESW)
645 set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags);
646
647 return mlx5_lag_set_port_sel_mode(ldev, mode, flags);
648 }
649
mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode,unsigned long flags)650 char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags)
651 {
652 int port_sel_mode = get_port_sel_mode(mode, flags);
653
654 switch (port_sel_mode) {
655 case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY: return "queue_affinity";
656 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT: return "hash";
657 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW: return "mpesw";
658 default: return "invalid";
659 }
660 }
661
mlx5_lag_create_single_fdb(struct mlx5_lag * ldev)662 static int mlx5_lag_create_single_fdb(struct mlx5_lag *ldev)
663 {
664 int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
665 struct mlx5_eswitch *master_esw;
666 struct mlx5_core_dev *dev0;
667 int i, j;
668 int err;
669
670 if (first_idx < 0)
671 return -EINVAL;
672
673 dev0 = ldev->pf[first_idx].dev;
674 master_esw = dev0->priv.eswitch;
675 mlx5_ldev_for_each(i, first_idx + 1, ldev) {
676 struct mlx5_eswitch *slave_esw = ldev->pf[i].dev->priv.eswitch;
677
678 err = mlx5_eswitch_offloads_single_fdb_add_one(master_esw,
679 slave_esw, ldev->ports);
680 if (err)
681 goto err;
682 }
683 return 0;
684 err:
685 mlx5_ldev_for_each_reverse(j, i, first_idx + 1, ldev)
686 mlx5_eswitch_offloads_single_fdb_del_one(master_esw,
687 ldev->pf[j].dev->priv.eswitch);
688 return err;
689 }
690
mlx5_create_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker,enum mlx5_lag_mode mode,unsigned long flags)691 static int mlx5_create_lag(struct mlx5_lag *ldev,
692 struct lag_tracker *tracker,
693 enum mlx5_lag_mode mode,
694 unsigned long flags)
695 {
696 bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
697 int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
698 u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
699 struct mlx5_core_dev *dev0;
700 int err;
701
702 if (first_idx < 0)
703 return -EINVAL;
704
705 dev0 = ldev->pf[first_idx].dev;
706 if (tracker)
707 mlx5_lag_print_mapping(dev0, ldev, tracker, flags);
708 mlx5_core_info(dev0, "shared_fdb:%d mode:%s\n",
709 shared_fdb, mlx5_get_str_port_sel_mode(mode, flags));
710
711 err = mlx5_cmd_create_lag(dev0, ldev, mode, flags);
712 if (err) {
713 mlx5_core_err(dev0,
714 "Failed to create LAG (%d)\n",
715 err);
716 return err;
717 }
718
719 if (shared_fdb) {
720 err = mlx5_lag_create_single_fdb(ldev);
721 if (err)
722 mlx5_core_err(dev0, "Can't enable single FDB mode\n");
723 else
724 mlx5_core_info(dev0, "Operation mode is single FDB\n");
725 }
726
727 if (err) {
728 MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
729 if (mlx5_cmd_exec_in(dev0, destroy_lag, in))
730 mlx5_core_err(dev0,
731 "Failed to deactivate RoCE LAG; driver restart required\n");
732 }
733 BLOCKING_INIT_NOTIFIER_HEAD(&dev0->priv.lag_nh);
734
735 return err;
736 }
737
mlx5_activate_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker,enum mlx5_lag_mode mode,bool shared_fdb)738 int mlx5_activate_lag(struct mlx5_lag *ldev,
739 struct lag_tracker *tracker,
740 enum mlx5_lag_mode mode,
741 bool shared_fdb)
742 {
743 int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
744 bool roce_lag = mode == MLX5_LAG_MODE_ROCE;
745 struct mlx5_core_dev *dev0;
746 unsigned long flags = 0;
747 int err;
748
749 if (first_idx < 0)
750 return -EINVAL;
751
752 dev0 = ldev->pf[first_idx].dev;
753 err = mlx5_lag_set_flags(ldev, mode, tracker, shared_fdb, &flags);
754 if (err)
755 return err;
756
757 if (mode != MLX5_LAG_MODE_MPESW) {
758 mlx5_infer_tx_affinity_mapping(tracker, ldev, ldev->buckets, ldev->v2p_map);
759 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
760 err = mlx5_lag_port_sel_create(ldev, tracker->hash_type,
761 ldev->v2p_map);
762 if (err) {
763 mlx5_core_err(dev0,
764 "Failed to create LAG port selection(%d)\n",
765 err);
766 return err;
767 }
768 }
769 }
770
771 err = mlx5_create_lag(ldev, tracker, mode, flags);
772 if (err) {
773 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
774 mlx5_lag_port_sel_destroy(ldev);
775 if (roce_lag)
776 mlx5_core_err(dev0,
777 "Failed to activate RoCE LAG\n");
778 else
779 mlx5_core_err(dev0,
780 "Failed to activate VF LAG\n"
781 "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
782 return err;
783 }
784
785 if (tracker && tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP &&
786 !roce_lag)
787 mlx5_lag_drop_rule_setup(ldev, tracker);
788
789 ldev->mode = mode;
790 ldev->mode_flags = flags;
791 return 0;
792 }
793
mlx5_deactivate_lag(struct mlx5_lag * ldev)794 int mlx5_deactivate_lag(struct mlx5_lag *ldev)
795 {
796 int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
797 u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
798 bool roce_lag = __mlx5_lag_is_roce(ldev);
799 unsigned long flags = ldev->mode_flags;
800 struct mlx5_eswitch *master_esw;
801 struct mlx5_core_dev *dev0;
802 int err;
803 int i;
804
805 if (first_idx < 0)
806 return -EINVAL;
807
808 dev0 = ldev->pf[first_idx].dev;
809 master_esw = dev0->priv.eswitch;
810 ldev->mode = MLX5_LAG_MODE_NONE;
811 ldev->mode_flags = 0;
812 mlx5_lag_mp_reset(ldev);
813
814 if (test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags)) {
815 mlx5_ldev_for_each(i, first_idx + 1, ldev)
816 mlx5_eswitch_offloads_single_fdb_del_one(master_esw,
817 ldev->pf[i].dev->priv.eswitch);
818 clear_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
819 }
820
821 MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
822 err = mlx5_cmd_exec_in(dev0, destroy_lag, in);
823 if (err) {
824 if (roce_lag) {
825 mlx5_core_err(dev0,
826 "Failed to deactivate RoCE LAG; driver restart required\n");
827 } else {
828 mlx5_core_err(dev0,
829 "Failed to deactivate VF LAG; driver restart required\n"
830 "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
831 }
832 return err;
833 }
834
835 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
836 mlx5_lag_port_sel_destroy(ldev);
837 ldev->buckets = 1;
838 }
839 if (mlx5_lag_has_drop_rule(ldev))
840 mlx5_lag_drop_rule_cleanup(ldev);
841
842 return 0;
843 }
844
mlx5_lag_check_prereq(struct mlx5_lag * ldev)845 bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
846 {
847 int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
848 #ifdef CONFIG_MLX5_ESWITCH
849 struct mlx5_core_dev *dev;
850 u8 mode;
851 #endif
852 bool roce_support;
853 int i;
854
855 if (first_idx < 0 || mlx5_lag_num_devs(ldev) != ldev->ports)
856 return false;
857
858 #ifdef CONFIG_MLX5_ESWITCH
859 mlx5_ldev_for_each(i, 0, ldev) {
860 dev = ldev->pf[i].dev;
861 if (mlx5_eswitch_num_vfs(dev->priv.eswitch) && !is_mdev_switchdev_mode(dev))
862 return false;
863 }
864
865 dev = ldev->pf[first_idx].dev;
866 mode = mlx5_eswitch_mode(dev);
867 mlx5_ldev_for_each(i, 0, ldev)
868 if (mlx5_eswitch_mode(ldev->pf[i].dev) != mode)
869 return false;
870
871 #else
872 mlx5_ldev_for_each(i, 0, ldev)
873 if (mlx5_sriov_is_enabled(ldev->pf[i].dev))
874 return false;
875 #endif
876 roce_support = mlx5_get_roce_state(ldev->pf[first_idx].dev);
877 mlx5_ldev_for_each(i, first_idx + 1, ldev)
878 if (mlx5_get_roce_state(ldev->pf[i].dev) != roce_support)
879 return false;
880
881 return true;
882 }
883
mlx5_lag_add_devices(struct mlx5_lag * ldev)884 void mlx5_lag_add_devices(struct mlx5_lag *ldev)
885 {
886 int i;
887
888 mlx5_ldev_for_each(i, 0, ldev) {
889 if (ldev->pf[i].dev->priv.flags &
890 MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
891 continue;
892
893 ldev->pf[i].dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
894 mlx5_rescan_drivers_locked(ldev->pf[i].dev);
895 }
896 }
897
mlx5_lag_remove_devices(struct mlx5_lag * ldev)898 void mlx5_lag_remove_devices(struct mlx5_lag *ldev)
899 {
900 int i;
901
902 mlx5_ldev_for_each(i, 0, ldev) {
903 if (ldev->pf[i].dev->priv.flags &
904 MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
905 continue;
906
907 ldev->pf[i].dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
908 mlx5_rescan_drivers_locked(ldev->pf[i].dev);
909 }
910 }
911
mlx5_disable_lag(struct mlx5_lag * ldev)912 void mlx5_disable_lag(struct mlx5_lag *ldev)
913 {
914 bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
915 int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
916 struct mlx5_core_dev *dev0;
917 bool roce_lag;
918 int err;
919 int i;
920
921 if (idx < 0)
922 return;
923
924 dev0 = ldev->pf[idx].dev;
925 roce_lag = __mlx5_lag_is_roce(ldev);
926
927 if (shared_fdb) {
928 mlx5_lag_remove_devices(ldev);
929 } else if (roce_lag) {
930 if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) {
931 dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
932 mlx5_rescan_drivers_locked(dev0);
933 }
934 mlx5_ldev_for_each(i, idx + 1, ldev)
935 mlx5_nic_vport_disable_roce(ldev->pf[i].dev);
936 }
937
938 err = mlx5_deactivate_lag(ldev);
939 if (err)
940 return;
941
942 if (shared_fdb || roce_lag)
943 mlx5_lag_add_devices(ldev);
944
945 if (shared_fdb)
946 mlx5_ldev_for_each(i, 0, ldev)
947 if (!(ldev->pf[i].dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
948 mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch);
949 }
950
mlx5_lag_shared_fdb_supported(struct mlx5_lag * ldev)951 bool mlx5_lag_shared_fdb_supported(struct mlx5_lag *ldev)
952 {
953 int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
954 struct mlx5_core_dev *dev;
955 int i;
956
957 if (idx < 0)
958 return false;
959
960 mlx5_ldev_for_each(i, idx + 1, ldev) {
961 dev = ldev->pf[i].dev;
962 if (is_mdev_switchdev_mode(dev) &&
963 mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) &&
964 MLX5_CAP_GEN(dev, lag_native_fdb_selection) &&
965 MLX5_CAP_ESW(dev, root_ft_on_other_esw) &&
966 mlx5_eswitch_get_npeers(dev->priv.eswitch) ==
967 MLX5_CAP_GEN(dev, num_lag_ports) - 1)
968 continue;
969 return false;
970 }
971
972 dev = ldev->pf[idx].dev;
973 if (is_mdev_switchdev_mode(dev) &&
974 mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) &&
975 mlx5_esw_offloads_devcom_is_ready(dev->priv.eswitch) &&
976 MLX5_CAP_ESW(dev, esw_shared_ingress_acl) &&
977 mlx5_eswitch_get_npeers(dev->priv.eswitch) == MLX5_CAP_GEN(dev, num_lag_ports) - 1)
978 return true;
979
980 return false;
981 }
982
mlx5_lag_is_roce_lag(struct mlx5_lag * ldev)983 static bool mlx5_lag_is_roce_lag(struct mlx5_lag *ldev)
984 {
985 bool roce_lag = true;
986 int i;
987
988 mlx5_ldev_for_each(i, 0, ldev)
989 roce_lag = roce_lag && !mlx5_sriov_is_enabled(ldev->pf[i].dev);
990
991 #ifdef CONFIG_MLX5_ESWITCH
992 mlx5_ldev_for_each(i, 0, ldev)
993 roce_lag = roce_lag && is_mdev_legacy_mode(ldev->pf[i].dev);
994 #endif
995
996 return roce_lag;
997 }
998
mlx5_lag_should_modify_lag(struct mlx5_lag * ldev,bool do_bond)999 static bool mlx5_lag_should_modify_lag(struct mlx5_lag *ldev, bool do_bond)
1000 {
1001 return do_bond && __mlx5_lag_is_active(ldev) &&
1002 ldev->mode != MLX5_LAG_MODE_MPESW;
1003 }
1004
mlx5_lag_should_disable_lag(struct mlx5_lag * ldev,bool do_bond)1005 static bool mlx5_lag_should_disable_lag(struct mlx5_lag *ldev, bool do_bond)
1006 {
1007 return !do_bond && __mlx5_lag_is_active(ldev) &&
1008 ldev->mode != MLX5_LAG_MODE_MPESW;
1009 }
1010
1011 #ifdef CONFIG_MLX5_ESWITCH
1012 static int
mlx5_lag_sum_devices_speed(struct mlx5_lag * ldev,u32 * sum_speed,int (* get_speed)(struct mlx5_core_dev *,u32 *))1013 mlx5_lag_sum_devices_speed(struct mlx5_lag *ldev, u32 *sum_speed,
1014 int (*get_speed)(struct mlx5_core_dev *, u32 *))
1015 {
1016 struct mlx5_core_dev *pf_mdev;
1017 int pf_idx;
1018 u32 speed;
1019 int ret;
1020
1021 *sum_speed = 0;
1022 mlx5_ldev_for_each(pf_idx, 0, ldev) {
1023 pf_mdev = ldev->pf[pf_idx].dev;
1024 if (!pf_mdev)
1025 continue;
1026
1027 ret = get_speed(pf_mdev, &speed);
1028 if (ret) {
1029 mlx5_core_dbg(pf_mdev,
1030 "Failed to get device speed using %ps. Device %s speed is not available (err=%d)\n",
1031 get_speed, dev_name(pf_mdev->device),
1032 ret);
1033 return ret;
1034 }
1035
1036 *sum_speed += speed;
1037 }
1038
1039 return 0;
1040 }
1041
mlx5_lag_sum_devices_max_speed(struct mlx5_lag * ldev,u32 * max_speed)1042 static int mlx5_lag_sum_devices_max_speed(struct mlx5_lag *ldev, u32 *max_speed)
1043 {
1044 return mlx5_lag_sum_devices_speed(ldev, max_speed,
1045 mlx5_port_max_linkspeed);
1046 }
1047
mlx5_lag_sum_devices_oper_speed(struct mlx5_lag * ldev,u32 * oper_speed)1048 static int mlx5_lag_sum_devices_oper_speed(struct mlx5_lag *ldev,
1049 u32 *oper_speed)
1050 {
1051 return mlx5_lag_sum_devices_speed(ldev, oper_speed,
1052 mlx5_port_oper_linkspeed);
1053 }
1054
mlx5_lag_modify_device_vports_speed(struct mlx5_core_dev * mdev,u32 speed)1055 static void mlx5_lag_modify_device_vports_speed(struct mlx5_core_dev *mdev,
1056 u32 speed)
1057 {
1058 u16 op_mod = MLX5_VPORT_STATE_OP_MOD_ESW_VPORT;
1059 struct mlx5_eswitch *esw = mdev->priv.eswitch;
1060 struct mlx5_vport *vport;
1061 unsigned long i;
1062 int ret;
1063
1064 if (!esw)
1065 return;
1066
1067 if (!MLX5_CAP_ESW(mdev, esw_vport_state_max_tx_speed))
1068 return;
1069
1070 mlx5_esw_for_each_vport(esw, i, vport) {
1071 if (!vport)
1072 continue;
1073
1074 if (vport->vport == MLX5_VPORT_UPLINK)
1075 continue;
1076
1077 ret = mlx5_modify_vport_max_tx_speed(mdev, op_mod,
1078 vport->vport, true, speed);
1079 if (ret)
1080 mlx5_core_dbg(mdev,
1081 "Failed to set vport %d speed %d, err=%d\n",
1082 vport->vport, speed, ret);
1083 }
1084 }
1085
mlx5_lag_set_vports_agg_speed(struct mlx5_lag * ldev)1086 void mlx5_lag_set_vports_agg_speed(struct mlx5_lag *ldev)
1087 {
1088 struct mlx5_core_dev *mdev;
1089 u32 speed;
1090 int pf_idx;
1091
1092 if (ldev->mode == MLX5_LAG_MODE_MPESW) {
1093 if (mlx5_lag_sum_devices_oper_speed(ldev, &speed))
1094 return;
1095 } else {
1096 speed = ldev->tracker.bond_speed_mbps;
1097 if (speed == SPEED_UNKNOWN)
1098 return;
1099 }
1100
1101 /* If speed is not set, use the sum of max speeds of all PFs */
1102 if (!speed && mlx5_lag_sum_devices_max_speed(ldev, &speed))
1103 return;
1104
1105 speed = speed / MLX5_MAX_TX_SPEED_UNIT;
1106
1107 mlx5_ldev_for_each(pf_idx, 0, ldev) {
1108 mdev = ldev->pf[pf_idx].dev;
1109 if (!mdev)
1110 continue;
1111
1112 mlx5_lag_modify_device_vports_speed(mdev, speed);
1113 }
1114 }
1115
mlx5_lag_reset_vports_speed(struct mlx5_lag * ldev)1116 void mlx5_lag_reset_vports_speed(struct mlx5_lag *ldev)
1117 {
1118 struct mlx5_core_dev *mdev;
1119 u32 speed;
1120 int pf_idx;
1121 int ret;
1122
1123 mlx5_ldev_for_each(pf_idx, 0, ldev) {
1124 mdev = ldev->pf[pf_idx].dev;
1125 if (!mdev)
1126 continue;
1127
1128 ret = mlx5_port_oper_linkspeed(mdev, &speed);
1129 if (ret) {
1130 mlx5_core_dbg(mdev,
1131 "Failed to reset vports speed for device %s. Oper speed is not available (err=%d)\n",
1132 dev_name(mdev->device), ret);
1133 continue;
1134 }
1135
1136 speed = speed / MLX5_MAX_TX_SPEED_UNIT;
1137 mlx5_lag_modify_device_vports_speed(mdev, speed);
1138 }
1139 }
1140 #endif
1141
mlx5_do_bond(struct mlx5_lag * ldev)1142 static void mlx5_do_bond(struct mlx5_lag *ldev)
1143 {
1144 int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
1145 struct lag_tracker tracker = { };
1146 struct mlx5_core_dev *dev0;
1147 struct net_device *ndev;
1148 bool do_bond, roce_lag;
1149 int err;
1150 int i;
1151
1152 if (idx < 0)
1153 return;
1154
1155 dev0 = ldev->pf[idx].dev;
1156 if (!mlx5_lag_is_ready(ldev)) {
1157 do_bond = false;
1158 } else {
1159 /* VF LAG is in multipath mode, ignore bond change requests */
1160 if (mlx5_lag_is_multipath(dev0))
1161 return;
1162
1163 tracker = ldev->tracker;
1164
1165 do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
1166 }
1167
1168 if (do_bond && !__mlx5_lag_is_active(ldev)) {
1169 bool shared_fdb = mlx5_lag_shared_fdb_supported(ldev);
1170
1171 roce_lag = mlx5_lag_is_roce_lag(ldev);
1172
1173 if (shared_fdb || roce_lag)
1174 mlx5_lag_remove_devices(ldev);
1175
1176 err = mlx5_activate_lag(ldev, &tracker,
1177 roce_lag ? MLX5_LAG_MODE_ROCE :
1178 MLX5_LAG_MODE_SRIOV,
1179 shared_fdb);
1180 if (err) {
1181 if (shared_fdb || roce_lag)
1182 mlx5_lag_add_devices(ldev);
1183 if (shared_fdb) {
1184 mlx5_ldev_for_each(i, 0, ldev)
1185 mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch);
1186 }
1187
1188 return;
1189 } else if (roce_lag) {
1190 dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
1191 mlx5_rescan_drivers_locked(dev0);
1192 mlx5_ldev_for_each(i, idx + 1, ldev) {
1193 if (mlx5_get_roce_state(ldev->pf[i].dev))
1194 mlx5_nic_vport_enable_roce(ldev->pf[i].dev);
1195 }
1196 } else if (shared_fdb) {
1197 int i;
1198
1199 dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
1200 mlx5_rescan_drivers_locked(dev0);
1201
1202 mlx5_ldev_for_each(i, 0, ldev) {
1203 err = mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch);
1204 if (err)
1205 break;
1206 }
1207
1208 if (err) {
1209 dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
1210 mlx5_rescan_drivers_locked(dev0);
1211 mlx5_deactivate_lag(ldev);
1212 mlx5_lag_add_devices(ldev);
1213 mlx5_ldev_for_each(i, 0, ldev)
1214 mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch);
1215 mlx5_core_err(dev0, "Failed to enable lag\n");
1216 return;
1217 }
1218 }
1219 if (tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
1220 ndev = mlx5_lag_active_backup_get_netdev(dev0);
1221 /** Only sriov and roce lag should have tracker->TX_type
1222 * set so no need to check the mode
1223 */
1224 blocking_notifier_call_chain(&dev0->priv.lag_nh,
1225 MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE,
1226 ndev);
1227 dev_put(ndev);
1228 }
1229 mlx5_lag_set_vports_agg_speed(ldev);
1230 } else if (mlx5_lag_should_modify_lag(ldev, do_bond)) {
1231 mlx5_modify_lag(ldev, &tracker);
1232 mlx5_lag_set_vports_agg_speed(ldev);
1233 } else if (mlx5_lag_should_disable_lag(ldev, do_bond)) {
1234 mlx5_lag_reset_vports_speed(ldev);
1235 mlx5_disable_lag(ldev);
1236 }
1237 }
1238
1239 /* The last mdev to unregister will destroy the workqueue before removing the
1240 * devcom component, and as all the mdevs use the same devcom component we are
1241 * guaranteed that the devcom is valid while the calling work is running.
1242 */
mlx5_lag_get_devcom_comp(struct mlx5_lag * ldev)1243 struct mlx5_devcom_comp_dev *mlx5_lag_get_devcom_comp(struct mlx5_lag *ldev)
1244 {
1245 struct mlx5_devcom_comp_dev *devcom = NULL;
1246 int i;
1247
1248 mutex_lock(&ldev->lock);
1249 i = mlx5_get_next_ldev_func(ldev, 0);
1250 if (i < MLX5_MAX_PORTS)
1251 devcom = ldev->pf[i].dev->priv.hca_devcom_comp;
1252 mutex_unlock(&ldev->lock);
1253 return devcom;
1254 }
1255
mlx5_queue_bond_work(struct mlx5_lag * ldev,unsigned long delay)1256 static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
1257 {
1258 queue_delayed_work(ldev->wq, &ldev->bond_work, delay);
1259 }
1260
mlx5_do_bond_work(struct work_struct * work)1261 static void mlx5_do_bond_work(struct work_struct *work)
1262 {
1263 struct delayed_work *delayed_work = to_delayed_work(work);
1264 struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag,
1265 bond_work);
1266 struct mlx5_devcom_comp_dev *devcom;
1267 int status;
1268
1269 devcom = mlx5_lag_get_devcom_comp(ldev);
1270 if (!devcom)
1271 return;
1272
1273 status = mlx5_devcom_comp_trylock(devcom);
1274 if (!status) {
1275 mlx5_queue_bond_work(ldev, HZ);
1276 return;
1277 }
1278
1279 mutex_lock(&ldev->lock);
1280 if (ldev->mode_changes_in_progress) {
1281 mutex_unlock(&ldev->lock);
1282 mlx5_devcom_comp_unlock(devcom);
1283 mlx5_queue_bond_work(ldev, HZ);
1284 return;
1285 }
1286
1287 mlx5_do_bond(ldev);
1288 mutex_unlock(&ldev->lock);
1289 mlx5_devcom_comp_unlock(devcom);
1290 }
1291
mlx5_handle_changeupper_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct netdev_notifier_changeupper_info * info)1292 static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
1293 struct lag_tracker *tracker,
1294 struct netdev_notifier_changeupper_info *info)
1295 {
1296 struct net_device *upper = info->upper_dev, *ndev_tmp;
1297 struct netdev_lag_upper_info *lag_upper_info = NULL;
1298 bool is_bonded, is_in_lag, mode_supported;
1299 bool has_inactive = 0;
1300 struct slave *slave;
1301 u8 bond_status = 0;
1302 int num_slaves = 0;
1303 int changed = 0;
1304 int i, idx = -1;
1305
1306 if (!netif_is_lag_master(upper))
1307 return 0;
1308
1309 if (info->linking)
1310 lag_upper_info = info->upper_info;
1311
1312 /* The event may still be of interest if the slave does not belong to
1313 * us, but is enslaved to a master which has one or more of our netdevs
1314 * as slaves (e.g., if a new slave is added to a master that bonds two
1315 * of our netdevs, we should unbond).
1316 */
1317 rcu_read_lock();
1318 for_each_netdev_in_bond_rcu(upper, ndev_tmp) {
1319 mlx5_ldev_for_each(i, 0, ldev) {
1320 if (ldev->pf[i].netdev == ndev_tmp) {
1321 idx++;
1322 break;
1323 }
1324 }
1325 if (i < MLX5_MAX_PORTS) {
1326 slave = bond_slave_get_rcu(ndev_tmp);
1327 if (slave)
1328 has_inactive |= bond_is_slave_inactive(slave);
1329 bond_status |= (1 << idx);
1330 }
1331
1332 num_slaves++;
1333 }
1334 rcu_read_unlock();
1335
1336 /* None of this lagdev's netdevs are slaves of this master. */
1337 if (!(bond_status & GENMASK(ldev->ports - 1, 0)))
1338 return 0;
1339
1340 if (lag_upper_info) {
1341 tracker->tx_type = lag_upper_info->tx_type;
1342 tracker->hash_type = lag_upper_info->hash_type;
1343 }
1344
1345 tracker->has_inactive = has_inactive;
1346 /* Determine bonding status:
1347 * A device is considered bonded if both its physical ports are slaves
1348 * of the same lag master, and only them.
1349 */
1350 is_in_lag = num_slaves == ldev->ports &&
1351 bond_status == GENMASK(ldev->ports - 1, 0);
1352
1353 /* Lag mode must be activebackup or hash. */
1354 mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP ||
1355 tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH;
1356
1357 is_bonded = is_in_lag && mode_supported;
1358 if (tracker->is_bonded != is_bonded) {
1359 tracker->is_bonded = is_bonded;
1360 changed = 1;
1361 }
1362
1363 if (!is_in_lag)
1364 return changed;
1365
1366 if (!mlx5_lag_is_ready(ldev))
1367 NL_SET_ERR_MSG_MOD(info->info.extack,
1368 "Can't activate LAG offload, PF is configured with more than 64 VFs");
1369 else if (!mode_supported)
1370 NL_SET_ERR_MSG_MOD(info->info.extack,
1371 "Can't activate LAG offload, TX type isn't supported");
1372
1373 return changed;
1374 }
1375
mlx5_handle_changelowerstate_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct net_device * ndev,struct netdev_notifier_changelowerstate_info * info)1376 static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev,
1377 struct lag_tracker *tracker,
1378 struct net_device *ndev,
1379 struct netdev_notifier_changelowerstate_info *info)
1380 {
1381 struct netdev_lag_lower_state_info *lag_lower_info;
1382 int idx;
1383
1384 if (!netif_is_lag_port(ndev))
1385 return 0;
1386
1387 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev);
1388 if (idx < 0)
1389 return 0;
1390
1391 /* This information is used to determine virtual to physical
1392 * port mapping.
1393 */
1394 lag_lower_info = info->lower_state_info;
1395 if (!lag_lower_info)
1396 return 0;
1397
1398 tracker->netdev_state[idx] = *lag_lower_info;
1399
1400 return 1;
1401 }
1402
mlx5_handle_changeinfodata_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct net_device * ndev)1403 static int mlx5_handle_changeinfodata_event(struct mlx5_lag *ldev,
1404 struct lag_tracker *tracker,
1405 struct net_device *ndev)
1406 {
1407 struct net_device *ndev_tmp;
1408 struct slave *slave;
1409 bool has_inactive = 0;
1410 int idx;
1411
1412 if (!netif_is_lag_master(ndev))
1413 return 0;
1414
1415 rcu_read_lock();
1416 for_each_netdev_in_bond_rcu(ndev, ndev_tmp) {
1417 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
1418 if (idx < 0)
1419 continue;
1420
1421 slave = bond_slave_get_rcu(ndev_tmp);
1422 if (slave)
1423 has_inactive |= bond_is_slave_inactive(slave);
1424 }
1425 rcu_read_unlock();
1426
1427 if (tracker->has_inactive == has_inactive)
1428 return 0;
1429
1430 tracker->has_inactive = has_inactive;
1431
1432 return 1;
1433 }
1434
mlx5_lag_update_tracker_speed(struct lag_tracker * tracker,struct net_device * ndev)1435 static void mlx5_lag_update_tracker_speed(struct lag_tracker *tracker,
1436 struct net_device *ndev)
1437 {
1438 struct ethtool_link_ksettings lksettings;
1439 struct net_device *bond_dev;
1440 int err;
1441
1442 if (netif_is_lag_master(ndev))
1443 bond_dev = ndev;
1444 else
1445 bond_dev = netdev_master_upper_dev_get(ndev);
1446
1447 if (!bond_dev) {
1448 tracker->bond_speed_mbps = SPEED_UNKNOWN;
1449 return;
1450 }
1451
1452 err = __ethtool_get_link_ksettings(bond_dev, &lksettings);
1453 if (err) {
1454 netdev_dbg(bond_dev,
1455 "Failed to get speed for bond dev %s, err=%d\n",
1456 bond_dev->name, err);
1457 tracker->bond_speed_mbps = SPEED_UNKNOWN;
1458 return;
1459 }
1460
1461 if (lksettings.base.speed == SPEED_UNKNOWN)
1462 tracker->bond_speed_mbps = 0;
1463 else
1464 tracker->bond_speed_mbps = lksettings.base.speed;
1465 }
1466
1467 /* Returns speed in Mbps. */
mlx5_lag_query_bond_speed(struct mlx5_core_dev * mdev,u32 * speed)1468 int mlx5_lag_query_bond_speed(struct mlx5_core_dev *mdev, u32 *speed)
1469 {
1470 struct mlx5_lag *ldev;
1471 unsigned long flags;
1472 int ret = 0;
1473
1474 spin_lock_irqsave(&lag_lock, flags);
1475 ldev = mlx5_lag_dev(mdev);
1476 if (!ldev) {
1477 ret = -ENODEV;
1478 goto unlock;
1479 }
1480
1481 *speed = ldev->tracker.bond_speed_mbps;
1482
1483 if (*speed == SPEED_UNKNOWN) {
1484 mlx5_core_dbg(mdev, "Bond speed is unknown\n");
1485 ret = -EINVAL;
1486 }
1487
1488 unlock:
1489 spin_unlock_irqrestore(&lag_lock, flags);
1490 return ret;
1491 }
1492 EXPORT_SYMBOL_GPL(mlx5_lag_query_bond_speed);
1493
1494 /* this handler is always registered to netdev events */
mlx5_lag_netdev_event(struct notifier_block * this,unsigned long event,void * ptr)1495 static int mlx5_lag_netdev_event(struct notifier_block *this,
1496 unsigned long event, void *ptr)
1497 {
1498 struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
1499 struct lag_tracker tracker;
1500 struct mlx5_lag *ldev;
1501 int changed = 0;
1502
1503 if (event != NETDEV_CHANGEUPPER &&
1504 event != NETDEV_CHANGELOWERSTATE &&
1505 event != NETDEV_CHANGEINFODATA)
1506 return NOTIFY_DONE;
1507
1508 ldev = container_of(this, struct mlx5_lag, nb);
1509
1510 tracker = ldev->tracker;
1511
1512 switch (event) {
1513 case NETDEV_CHANGEUPPER:
1514 changed = mlx5_handle_changeupper_event(ldev, &tracker, ptr);
1515 break;
1516 case NETDEV_CHANGELOWERSTATE:
1517 changed = mlx5_handle_changelowerstate_event(ldev, &tracker,
1518 ndev, ptr);
1519 break;
1520 case NETDEV_CHANGEINFODATA:
1521 changed = mlx5_handle_changeinfodata_event(ldev, &tracker, ndev);
1522 break;
1523 }
1524
1525 if (changed)
1526 mlx5_lag_update_tracker_speed(&tracker, ndev);
1527
1528 ldev->tracker = tracker;
1529
1530 if (changed)
1531 mlx5_queue_bond_work(ldev, 0);
1532
1533 return NOTIFY_DONE;
1534 }
1535
mlx5_ldev_add_netdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev,struct net_device * netdev)1536 static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev,
1537 struct mlx5_core_dev *dev,
1538 struct net_device *netdev)
1539 {
1540 unsigned int fn = mlx5_get_dev_index(dev);
1541 unsigned long flags;
1542
1543 spin_lock_irqsave(&lag_lock, flags);
1544 ldev->pf[fn].netdev = netdev;
1545 ldev->tracker.netdev_state[fn].link_up = 0;
1546 ldev->tracker.netdev_state[fn].tx_enabled = 0;
1547 spin_unlock_irqrestore(&lag_lock, flags);
1548 }
1549
mlx5_ldev_remove_netdev(struct mlx5_lag * ldev,struct net_device * netdev)1550 static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev,
1551 struct net_device *netdev)
1552 {
1553 unsigned long flags;
1554 int i;
1555
1556 spin_lock_irqsave(&lag_lock, flags);
1557 mlx5_ldev_for_each(i, 0, ldev) {
1558 if (ldev->pf[i].netdev == netdev) {
1559 ldev->pf[i].netdev = NULL;
1560 break;
1561 }
1562 }
1563 spin_unlock_irqrestore(&lag_lock, flags);
1564 }
1565
mlx5_ldev_add_mdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev)1566 static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev,
1567 struct mlx5_core_dev *dev)
1568 {
1569 unsigned int fn = mlx5_get_dev_index(dev);
1570
1571 ldev->pf[fn].dev = dev;
1572 dev->priv.lag = ldev;
1573
1574 MLX5_NB_INIT(&ldev->pf[fn].port_change_nb,
1575 mlx5_lag_mpesw_port_change_event, PORT_CHANGE);
1576 mlx5_eq_notifier_register(dev, &ldev->pf[fn].port_change_nb);
1577 }
1578
mlx5_ldev_remove_mdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev)1579 static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
1580 struct mlx5_core_dev *dev)
1581 {
1582 int fn;
1583
1584 fn = mlx5_get_dev_index(dev);
1585 if (ldev->pf[fn].dev != dev)
1586 return;
1587
1588 if (ldev->pf[fn].port_change_nb.nb.notifier_call)
1589 mlx5_eq_notifier_unregister(dev, &ldev->pf[fn].port_change_nb);
1590
1591 ldev->pf[fn].dev = NULL;
1592 dev->priv.lag = NULL;
1593 }
1594
1595 /* Must be called with HCA devcom component lock held */
__mlx5_lag_dev_add_mdev(struct mlx5_core_dev * dev)1596 static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
1597 {
1598 struct mlx5_devcom_comp_dev *pos = NULL;
1599 struct mlx5_lag *ldev = NULL;
1600 struct mlx5_core_dev *tmp_dev;
1601
1602 tmp_dev = mlx5_devcom_get_next_peer_data(dev->priv.hca_devcom_comp, &pos);
1603 if (tmp_dev)
1604 ldev = mlx5_lag_dev(tmp_dev);
1605
1606 if (!ldev) {
1607 ldev = mlx5_lag_dev_alloc(dev);
1608 if (!ldev) {
1609 mlx5_core_err(dev, "Failed to alloc lag dev\n");
1610 return 0;
1611 }
1612 mlx5_ldev_add_mdev(ldev, dev);
1613 return 0;
1614 }
1615
1616 mutex_lock(&ldev->lock);
1617 if (ldev->mode_changes_in_progress) {
1618 mutex_unlock(&ldev->lock);
1619 return -EAGAIN;
1620 }
1621 mlx5_ldev_get(ldev);
1622 mlx5_ldev_add_mdev(ldev, dev);
1623 mutex_unlock(&ldev->lock);
1624
1625 return 0;
1626 }
1627
mlx5_lag_unregister_hca_devcom_comp(struct mlx5_core_dev * dev)1628 static void mlx5_lag_unregister_hca_devcom_comp(struct mlx5_core_dev *dev)
1629 {
1630 mlx5_devcom_unregister_component(dev->priv.hca_devcom_comp);
1631 dev->priv.hca_devcom_comp = NULL;
1632 }
1633
mlx5_lag_register_hca_devcom_comp(struct mlx5_core_dev * dev)1634 static int mlx5_lag_register_hca_devcom_comp(struct mlx5_core_dev *dev)
1635 {
1636 struct mlx5_devcom_match_attr attr = {
1637 .flags = MLX5_DEVCOM_MATCH_FLAGS_NS,
1638 .net = mlx5_core_net(dev),
1639 };
1640 u8 len __always_unused;
1641
1642 mlx5_query_nic_sw_system_image_guid(dev, attr.key.buf, &len);
1643
1644 /* This component is use to sync adding core_dev to lag_dev and to sync
1645 * changes of mlx5_adev_devices between LAG layer and other layers.
1646 */
1647 dev->priv.hca_devcom_comp =
1648 mlx5_devcom_register_component(dev->priv.devc,
1649 MLX5_DEVCOM_HCA_PORTS,
1650 &attr, NULL, dev);
1651 if (!dev->priv.hca_devcom_comp) {
1652 mlx5_core_err(dev,
1653 "Failed to register devcom HCA component.");
1654 return -EINVAL;
1655 }
1656
1657 return 0;
1658 }
1659
mlx5_lag_remove_mdev(struct mlx5_core_dev * dev)1660 void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
1661 {
1662 struct mlx5_lag *ldev;
1663
1664 ldev = mlx5_lag_dev(dev);
1665 if (!ldev)
1666 return;
1667
1668 /* mdev is being removed, might as well remove debugfs
1669 * as early as possible.
1670 */
1671 mlx5_ldev_remove_debugfs(dev->priv.dbg.lag_debugfs);
1672 recheck:
1673 mutex_lock(&ldev->lock);
1674 if (ldev->mode_changes_in_progress) {
1675 mutex_unlock(&ldev->lock);
1676 msleep(100);
1677 goto recheck;
1678 }
1679 mlx5_ldev_remove_mdev(ldev, dev);
1680 mutex_unlock(&ldev->lock);
1681 mlx5_lag_unregister_hca_devcom_comp(dev);
1682 mlx5_ldev_put(ldev);
1683 }
1684
mlx5_lag_add_mdev(struct mlx5_core_dev * dev)1685 void mlx5_lag_add_mdev(struct mlx5_core_dev *dev)
1686 {
1687 int err;
1688
1689 if (!mlx5_lag_is_supported(dev))
1690 return;
1691
1692 if (mlx5_lag_register_hca_devcom_comp(dev))
1693 return;
1694
1695 recheck:
1696 mlx5_devcom_comp_lock(dev->priv.hca_devcom_comp);
1697 err = __mlx5_lag_dev_add_mdev(dev);
1698 mlx5_devcom_comp_unlock(dev->priv.hca_devcom_comp);
1699
1700 if (err) {
1701 msleep(100);
1702 goto recheck;
1703 }
1704 mlx5_ldev_add_debugfs(dev);
1705 }
1706
mlx5_lag_remove_netdev(struct mlx5_core_dev * dev,struct net_device * netdev)1707 void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev,
1708 struct net_device *netdev)
1709 {
1710 struct mlx5_lag *ldev;
1711 bool lag_is_active;
1712
1713 ldev = mlx5_lag_dev(dev);
1714 if (!ldev)
1715 return;
1716
1717 mutex_lock(&ldev->lock);
1718 mlx5_ldev_remove_netdev(ldev, netdev);
1719 clear_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
1720
1721 lag_is_active = __mlx5_lag_is_active(ldev);
1722 mutex_unlock(&ldev->lock);
1723
1724 if (lag_is_active)
1725 mlx5_queue_bond_work(ldev, 0);
1726 }
1727
mlx5_lag_add_netdev(struct mlx5_core_dev * dev,struct net_device * netdev)1728 void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
1729 struct net_device *netdev)
1730 {
1731 struct mlx5_lag *ldev;
1732 int num = 0;
1733
1734 ldev = mlx5_lag_dev(dev);
1735 if (!ldev)
1736 return;
1737
1738 mutex_lock(&ldev->lock);
1739 mlx5_ldev_add_netdev(ldev, dev, netdev);
1740 num = mlx5_lag_num_netdevs(ldev);
1741 if (num >= ldev->ports)
1742 set_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
1743 mutex_unlock(&ldev->lock);
1744 mlx5_queue_bond_work(ldev, 0);
1745 }
1746
mlx5_get_pre_ldev_func(struct mlx5_lag * ldev,int start_idx,int end_idx)1747 int mlx5_get_pre_ldev_func(struct mlx5_lag *ldev, int start_idx, int end_idx)
1748 {
1749 int i;
1750
1751 for (i = start_idx; i >= end_idx; i--)
1752 if (ldev->pf[i].dev)
1753 return i;
1754 return -1;
1755 }
1756
mlx5_get_next_ldev_func(struct mlx5_lag * ldev,int start_idx)1757 int mlx5_get_next_ldev_func(struct mlx5_lag *ldev, int start_idx)
1758 {
1759 int i;
1760
1761 for (i = start_idx; i < MLX5_MAX_PORTS; i++)
1762 if (ldev->pf[i].dev)
1763 return i;
1764 return MLX5_MAX_PORTS;
1765 }
1766
mlx5_lag_is_roce(struct mlx5_core_dev * dev)1767 bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
1768 {
1769 struct mlx5_lag *ldev;
1770 unsigned long flags;
1771 bool res;
1772
1773 spin_lock_irqsave(&lag_lock, flags);
1774 ldev = mlx5_lag_dev(dev);
1775 res = ldev && __mlx5_lag_is_roce(ldev);
1776 spin_unlock_irqrestore(&lag_lock, flags);
1777
1778 return res;
1779 }
1780 EXPORT_SYMBOL(mlx5_lag_is_roce);
1781
mlx5_lag_is_active(struct mlx5_core_dev * dev)1782 bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
1783 {
1784 struct mlx5_lag *ldev;
1785 unsigned long flags;
1786 bool res;
1787
1788 spin_lock_irqsave(&lag_lock, flags);
1789 ldev = mlx5_lag_dev(dev);
1790 res = ldev && __mlx5_lag_is_active(ldev);
1791 spin_unlock_irqrestore(&lag_lock, flags);
1792
1793 return res;
1794 }
1795 EXPORT_SYMBOL(mlx5_lag_is_active);
1796
mlx5_lag_mode_is_hash(struct mlx5_core_dev * dev)1797 bool mlx5_lag_mode_is_hash(struct mlx5_core_dev *dev)
1798 {
1799 struct mlx5_lag *ldev;
1800 unsigned long flags;
1801 bool res = 0;
1802
1803 spin_lock_irqsave(&lag_lock, flags);
1804 ldev = mlx5_lag_dev(dev);
1805 if (ldev)
1806 res = test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags);
1807 spin_unlock_irqrestore(&lag_lock, flags);
1808
1809 return res;
1810 }
1811 EXPORT_SYMBOL(mlx5_lag_mode_is_hash);
1812
mlx5_lag_is_master(struct mlx5_core_dev * dev)1813 bool mlx5_lag_is_master(struct mlx5_core_dev *dev)
1814 {
1815 struct mlx5_lag *ldev;
1816 unsigned long flags;
1817 bool res = false;
1818 int idx;
1819
1820 spin_lock_irqsave(&lag_lock, flags);
1821 ldev = mlx5_lag_dev(dev);
1822 idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
1823 res = ldev && __mlx5_lag_is_active(ldev) && idx >= 0 && dev == ldev->pf[idx].dev;
1824 spin_unlock_irqrestore(&lag_lock, flags);
1825
1826 return res;
1827 }
1828 EXPORT_SYMBOL(mlx5_lag_is_master);
1829
mlx5_lag_is_sriov(struct mlx5_core_dev * dev)1830 bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
1831 {
1832 struct mlx5_lag *ldev;
1833 unsigned long flags;
1834 bool res;
1835
1836 spin_lock_irqsave(&lag_lock, flags);
1837 ldev = mlx5_lag_dev(dev);
1838 res = ldev && __mlx5_lag_is_sriov(ldev);
1839 spin_unlock_irqrestore(&lag_lock, flags);
1840
1841 return res;
1842 }
1843 EXPORT_SYMBOL(mlx5_lag_is_sriov);
1844
mlx5_lag_is_shared_fdb(struct mlx5_core_dev * dev)1845 bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev)
1846 {
1847 struct mlx5_lag *ldev;
1848 unsigned long flags;
1849 bool res;
1850
1851 spin_lock_irqsave(&lag_lock, flags);
1852 ldev = mlx5_lag_dev(dev);
1853 res = ldev && test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
1854 spin_unlock_irqrestore(&lag_lock, flags);
1855
1856 return res;
1857 }
1858 EXPORT_SYMBOL(mlx5_lag_is_shared_fdb);
1859
mlx5_lag_disable_change(struct mlx5_core_dev * dev)1860 void mlx5_lag_disable_change(struct mlx5_core_dev *dev)
1861 {
1862 struct mlx5_lag *ldev;
1863
1864 ldev = mlx5_lag_dev(dev);
1865 if (!ldev)
1866 return;
1867
1868 mlx5_devcom_comp_lock(dev->priv.hca_devcom_comp);
1869 mutex_lock(&ldev->lock);
1870
1871 ldev->mode_changes_in_progress++;
1872 if (__mlx5_lag_is_active(ldev)) {
1873 if (ldev->mode == MLX5_LAG_MODE_MPESW)
1874 mlx5_lag_disable_mpesw(ldev);
1875 else
1876 mlx5_disable_lag(ldev);
1877 }
1878
1879 mutex_unlock(&ldev->lock);
1880 mlx5_devcom_comp_unlock(dev->priv.hca_devcom_comp);
1881 }
1882
mlx5_lag_enable_change(struct mlx5_core_dev * dev)1883 void mlx5_lag_enable_change(struct mlx5_core_dev *dev)
1884 {
1885 struct mlx5_lag *ldev;
1886
1887 ldev = mlx5_lag_dev(dev);
1888 if (!ldev)
1889 return;
1890
1891 mutex_lock(&ldev->lock);
1892 ldev->mode_changes_in_progress--;
1893 mutex_unlock(&ldev->lock);
1894 mlx5_queue_bond_work(ldev, 0);
1895 }
1896
mlx5_lag_get_slave_port(struct mlx5_core_dev * dev,struct net_device * slave)1897 u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
1898 struct net_device *slave)
1899 {
1900 struct mlx5_lag *ldev;
1901 unsigned long flags;
1902 u8 port = 0;
1903 int i;
1904
1905 spin_lock_irqsave(&lag_lock, flags);
1906 ldev = mlx5_lag_dev(dev);
1907 if (!(ldev && __mlx5_lag_is_roce(ldev)))
1908 goto unlock;
1909
1910 mlx5_ldev_for_each(i, 0, ldev) {
1911 if (ldev->pf[i].netdev == slave) {
1912 port = i;
1913 break;
1914 }
1915 }
1916
1917 port = ldev->v2p_map[port * ldev->buckets];
1918
1919 unlock:
1920 spin_unlock_irqrestore(&lag_lock, flags);
1921 return port;
1922 }
1923 EXPORT_SYMBOL(mlx5_lag_get_slave_port);
1924
mlx5_lag_get_num_ports(struct mlx5_core_dev * dev)1925 u8 mlx5_lag_get_num_ports(struct mlx5_core_dev *dev)
1926 {
1927 struct mlx5_lag *ldev;
1928
1929 ldev = mlx5_lag_dev(dev);
1930 if (!ldev)
1931 return 0;
1932
1933 return ldev->ports;
1934 }
1935 EXPORT_SYMBOL(mlx5_lag_get_num_ports);
1936
mlx5_lag_get_next_peer_mdev(struct mlx5_core_dev * dev,int * i)1937 struct mlx5_core_dev *mlx5_lag_get_next_peer_mdev(struct mlx5_core_dev *dev, int *i)
1938 {
1939 struct mlx5_core_dev *peer_dev = NULL;
1940 struct mlx5_lag *ldev;
1941 unsigned long flags;
1942 int idx;
1943
1944 spin_lock_irqsave(&lag_lock, flags);
1945 ldev = mlx5_lag_dev(dev);
1946 if (!ldev)
1947 goto unlock;
1948
1949 if (*i == MLX5_MAX_PORTS)
1950 goto unlock;
1951 mlx5_ldev_for_each(idx, *i, ldev)
1952 if (ldev->pf[idx].dev != dev)
1953 break;
1954
1955 if (idx == MLX5_MAX_PORTS) {
1956 *i = idx;
1957 goto unlock;
1958 }
1959 *i = idx + 1;
1960
1961 peer_dev = ldev->pf[idx].dev;
1962
1963 unlock:
1964 spin_unlock_irqrestore(&lag_lock, flags);
1965 return peer_dev;
1966 }
1967 EXPORT_SYMBOL(mlx5_lag_get_next_peer_mdev);
1968
mlx5_lag_query_cong_counters(struct mlx5_core_dev * dev,u64 * values,int num_counters,size_t * offsets)1969 int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
1970 u64 *values,
1971 int num_counters,
1972 size_t *offsets)
1973 {
1974 int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
1975 struct mlx5_core_dev **mdev;
1976 int ret = 0, i, j, idx = 0;
1977 struct mlx5_lag *ldev;
1978 unsigned long flags;
1979 int num_ports;
1980 void *out;
1981
1982 out = kvzalloc(outlen, GFP_KERNEL);
1983 if (!out)
1984 return -ENOMEM;
1985
1986 mdev = kvzalloc(sizeof(mdev[0]) * MLX5_MAX_PORTS, GFP_KERNEL);
1987 if (!mdev) {
1988 ret = -ENOMEM;
1989 goto free_out;
1990 }
1991
1992 memset(values, 0, sizeof(*values) * num_counters);
1993
1994 spin_lock_irqsave(&lag_lock, flags);
1995 ldev = mlx5_lag_dev(dev);
1996 if (ldev && __mlx5_lag_is_active(ldev)) {
1997 num_ports = ldev->ports;
1998 mlx5_ldev_for_each(i, 0, ldev)
1999 mdev[idx++] = ldev->pf[i].dev;
2000 } else {
2001 num_ports = 1;
2002 mdev[MLX5_LAG_P1] = dev;
2003 }
2004 spin_unlock_irqrestore(&lag_lock, flags);
2005
2006 for (i = 0; i < num_ports; ++i) {
2007 u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {};
2008
2009 MLX5_SET(query_cong_statistics_in, in, opcode,
2010 MLX5_CMD_OP_QUERY_CONG_STATISTICS);
2011 ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in,
2012 out);
2013 if (ret)
2014 goto free_mdev;
2015
2016 for (j = 0; j < num_counters; ++j)
2017 values[j] += be64_to_cpup((__be64 *)(out + offsets[j]));
2018 }
2019
2020 free_mdev:
2021 kvfree(mdev);
2022 free_out:
2023 kvfree(out);
2024 return ret;
2025 }
2026 EXPORT_SYMBOL(mlx5_lag_query_cong_counters);
2027