1 /*
2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33 #include <linux/netdevice.h>
34 #include <net/bonding.h>
35 #include <linux/mlx5/driver.h>
36 #include <linux/mlx5/eswitch.h>
37 #include <linux/mlx5/vport.h>
38 #include <linux/mlx5/lag.h>
39 #include "lib/mlx5.h"
40 #include "lib/devcom.h"
41 #include "mlx5_core.h"
42 #include "eswitch.h"
43 #include "esw/acl/ofld.h"
44 #include "lag.h"
45 #include "mp.h"
46 #include "mpesw.h"
47
48
49 /* General purpose, use for short periods of time.
50 * Beware of lock dependencies (preferably, no locks should be acquired
51 * under it).
52 */
53 static DEFINE_SPINLOCK(lag_lock);
54
get_port_sel_mode(enum mlx5_lag_mode mode,unsigned long flags)55 static int get_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags)
56 {
57 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
58 return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT;
59
60 if (mode == MLX5_LAG_MODE_MPESW)
61 return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW;
62
63 return MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY;
64 }
65
lag_active_port_bits(struct mlx5_lag * ldev)66 static u8 lag_active_port_bits(struct mlx5_lag *ldev)
67 {
68 u8 enabled_ports[MLX5_MAX_PORTS] = {};
69 u8 active_port = 0;
70 int num_enabled;
71 int idx;
72
73 mlx5_infer_tx_enabled(&ldev->tracker, ldev, enabled_ports,
74 &num_enabled);
75 for (idx = 0; idx < num_enabled; idx++)
76 active_port |= BIT_MASK(enabled_ports[idx]);
77
78 return active_port;
79 }
80
mlx5_cmd_create_lag(struct mlx5_core_dev * dev,struct mlx5_lag * ldev,int mode,unsigned long flags)81 static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, struct mlx5_lag *ldev,
82 int mode, unsigned long flags)
83 {
84 bool fdb_sel_mode = test_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE,
85 &flags);
86 int port_sel_mode = get_port_sel_mode(mode, flags);
87 u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {};
88 u8 *ports = ldev->v2p_map;
89 int idx0, idx1;
90 void *lag_ctx;
91
92 lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
93 MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG);
94 MLX5_SET(lagc, lag_ctx, fdb_selection_mode, fdb_sel_mode);
95 idx0 = mlx5_lag_get_dev_index_by_seq(ldev, 0);
96 idx1 = mlx5_lag_get_dev_index_by_seq(ldev, 1);
97
98 if (idx0 < 0 || idx1 < 0)
99 return -EINVAL;
100
101 switch (port_sel_mode) {
102 case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY:
103 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[idx0]);
104 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[idx1]);
105 break;
106 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT:
107 if (!MLX5_CAP_PORT_SELECTION(dev, port_select_flow_table_bypass))
108 break;
109
110 MLX5_SET(lagc, lag_ctx, active_port,
111 lag_active_port_bits(mlx5_lag_dev(dev)));
112 break;
113 default:
114 break;
115 }
116 MLX5_SET(lagc, lag_ctx, port_select_mode, port_sel_mode);
117
118 return mlx5_cmd_exec_in(dev, create_lag, in);
119 }
120
mlx5_cmd_modify_lag(struct mlx5_core_dev * dev,struct mlx5_lag * ldev,u8 * ports)121 static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, struct mlx5_lag *ldev,
122 u8 *ports)
123 {
124 u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
125 void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
126 int idx0, idx1;
127
128 idx0 = mlx5_lag_get_dev_index_by_seq(ldev, 0);
129 idx1 = mlx5_lag_get_dev_index_by_seq(ldev, 1);
130 if (idx0 < 0 || idx1 < 0)
131 return -EINVAL;
132
133 MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
134 MLX5_SET(modify_lag_in, in, field_select, 0x1);
135
136 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[idx0]);
137 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[idx1]);
138
139 return mlx5_cmd_exec_in(dev, modify_lag, in);
140 }
141
mlx5_cmd_create_vport_lag(struct mlx5_core_dev * dev)142 int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev)
143 {
144 u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {};
145
146 MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG);
147
148 return mlx5_cmd_exec_in(dev, create_vport_lag, in);
149 }
150 EXPORT_SYMBOL(mlx5_cmd_create_vport_lag);
151
mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev * dev)152 int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
153 {
154 u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {};
155
156 MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG);
157
158 return mlx5_cmd_exec_in(dev, destroy_vport_lag, in);
159 }
160 EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
161
mlx5_infer_tx_disabled(struct lag_tracker * tracker,struct mlx5_lag * ldev,u8 * ports,int * num_disabled)162 static void mlx5_infer_tx_disabled(struct lag_tracker *tracker, struct mlx5_lag *ldev,
163 u8 *ports, int *num_disabled)
164 {
165 int i;
166
167 *num_disabled = 0;
168 mlx5_ldev_for_each(i, 0, ldev)
169 if (!tracker->netdev_state[i].tx_enabled ||
170 !tracker->netdev_state[i].link_up)
171 ports[(*num_disabled)++] = i;
172 }
173
mlx5_infer_tx_enabled(struct lag_tracker * tracker,struct mlx5_lag * ldev,u8 * ports,int * num_enabled)174 void mlx5_infer_tx_enabled(struct lag_tracker *tracker, struct mlx5_lag *ldev,
175 u8 *ports, int *num_enabled)
176 {
177 int i;
178
179 *num_enabled = 0;
180 mlx5_ldev_for_each(i, 0, ldev)
181 if (tracker->netdev_state[i].tx_enabled &&
182 tracker->netdev_state[i].link_up)
183 ports[(*num_enabled)++] = i;
184
185 if (*num_enabled == 0)
186 mlx5_infer_tx_disabled(tracker, ldev, ports, num_enabled);
187 }
188
mlx5_lag_print_mapping(struct mlx5_core_dev * dev,struct mlx5_lag * ldev,struct lag_tracker * tracker,unsigned long flags)189 static void mlx5_lag_print_mapping(struct mlx5_core_dev *dev,
190 struct mlx5_lag *ldev,
191 struct lag_tracker *tracker,
192 unsigned long flags)
193 {
194 char buf[MLX5_MAX_PORTS * 10 + 1] = {};
195 u8 enabled_ports[MLX5_MAX_PORTS] = {};
196 int written = 0;
197 int num_enabled;
198 int idx;
199 int err;
200 int i;
201 int j;
202
203 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
204 mlx5_infer_tx_enabled(tracker, ldev, enabled_ports,
205 &num_enabled);
206 for (i = 0; i < num_enabled; i++) {
207 err = scnprintf(buf + written, 4, "%d, ", enabled_ports[i] + 1);
208 if (err != 3)
209 return;
210 written += err;
211 }
212 buf[written - 2] = 0;
213 mlx5_core_info(dev, "lag map active ports: %s\n", buf);
214 } else {
215 mlx5_ldev_for_each(i, 0, ldev) {
216 for (j = 0; j < ldev->buckets; j++) {
217 idx = i * ldev->buckets + j;
218 err = scnprintf(buf + written, 10,
219 " port %d:%d", i + 1, ldev->v2p_map[idx]);
220 if (err != 9)
221 return;
222 written += err;
223 }
224 }
225 mlx5_core_info(dev, "lag map:%s\n", buf);
226 }
227 }
228
229 static int mlx5_lag_netdev_event(struct notifier_block *this,
230 unsigned long event, void *ptr);
231 static void mlx5_do_bond_work(struct work_struct *work);
232
mlx5_ldev_free(struct kref * ref)233 static void mlx5_ldev_free(struct kref *ref)
234 {
235 struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref);
236 struct lag_func *pf;
237 struct net *net;
238 int i;
239
240 if (ldev->nb.notifier_call) {
241 net = read_pnet(&ldev->net);
242 unregister_netdevice_notifier_net(net, &ldev->nb);
243 }
244
245 mlx5_ldev_for_each(i, 0, ldev) {
246 pf = mlx5_lag_pf(ldev, i);
247 if (pf->port_change_nb.nb.notifier_call) {
248 struct mlx5_nb *nb = &pf->port_change_nb;
249
250 mlx5_eq_notifier_unregister(pf->dev, nb);
251 }
252 xa_erase(&ldev->pfs, i);
253 kfree(pf);
254 }
255 xa_destroy(&ldev->pfs);
256
257 mlx5_lag_mp_cleanup(ldev);
258 cancel_delayed_work_sync(&ldev->bond_work);
259 cancel_work_sync(&ldev->speed_update_work);
260 destroy_workqueue(ldev->wq);
261 mutex_destroy(&ldev->lock);
262 kfree(ldev);
263 }
264
mlx5_ldev_put(struct mlx5_lag * ldev)265 static void mlx5_ldev_put(struct mlx5_lag *ldev)
266 {
267 kref_put(&ldev->ref, mlx5_ldev_free);
268 }
269
mlx5_ldev_get(struct mlx5_lag * ldev)270 static void mlx5_ldev_get(struct mlx5_lag *ldev)
271 {
272 kref_get(&ldev->ref);
273 }
274
mlx5_lag_dev_alloc(struct mlx5_core_dev * dev)275 static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev)
276 {
277 struct mlx5_lag *ldev;
278 int err;
279
280 ldev = kzalloc_obj(*ldev);
281 if (!ldev)
282 return NULL;
283
284 ldev->wq = create_singlethread_workqueue("mlx5_lag");
285 if (!ldev->wq) {
286 kfree(ldev);
287 return NULL;
288 }
289
290 kref_init(&ldev->ref);
291 mutex_init(&ldev->lock);
292 xa_init_flags(&ldev->pfs, XA_FLAGS_ALLOC);
293 INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
294 INIT_WORK(&ldev->speed_update_work, mlx5_mpesw_speed_update_work);
295
296 ldev->nb.notifier_call = mlx5_lag_netdev_event;
297 write_pnet(&ldev->net, mlx5_core_net(dev));
298 if (register_netdevice_notifier_net(read_pnet(&ldev->net), &ldev->nb)) {
299 ldev->nb.notifier_call = NULL;
300 mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
301 }
302 ldev->mode = MLX5_LAG_MODE_NONE;
303
304 err = mlx5_lag_mp_init(ldev);
305 if (err)
306 mlx5_core_err(dev, "Failed to init multipath lag err=%d\n",
307 err);
308
309 ldev->ports = MLX5_CAP_GEN(dev, num_lag_ports);
310 ldev->buckets = 1;
311
312 return ldev;
313 }
314
mlx5_lag_dev_get_netdev_idx(struct mlx5_lag * ldev,struct net_device * ndev)315 int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
316 struct net_device *ndev)
317 {
318 struct lag_func *pf;
319 int i;
320
321 mlx5_ldev_for_each(i, 0, ldev) {
322 pf = mlx5_lag_pf(ldev, i);
323 if (pf->netdev == ndev)
324 return i;
325 }
326
327 return -ENOENT;
328 }
329
mlx5_lag_get_master_idx(struct mlx5_lag * ldev)330 static int mlx5_lag_get_master_idx(struct mlx5_lag *ldev)
331 {
332 unsigned long idx = 0;
333 void *entry;
334
335 if (!ldev)
336 return -ENOENT;
337
338 entry = xa_find(&ldev->pfs, &idx, U8_MAX, MLX5_LAG_XA_MARK_MASTER);
339 if (!entry)
340 return -ENOENT;
341
342 return (int)idx;
343 }
344
mlx5_lag_get_dev_index_by_seq(struct mlx5_lag * ldev,int seq)345 int mlx5_lag_get_dev_index_by_seq(struct mlx5_lag *ldev, int seq)
346 {
347 int master_idx, i, num = 0;
348
349 if (!ldev)
350 return -ENOENT;
351
352 master_idx = mlx5_lag_get_master_idx(ldev);
353
354 /* If seq 0 is requested and there's a primary PF, return it */
355 if (master_idx >= 0) {
356 if (seq == 0)
357 return master_idx;
358 num++;
359 }
360
361 mlx5_ldev_for_each(i, 0, ldev) {
362 /* Skip the primary PF in the loop */
363 if (i == master_idx)
364 continue;
365
366 if (num == seq)
367 return i;
368 num++;
369 }
370 return -ENOENT;
371 }
372
373 /* Reverse of mlx5_lag_get_dev_index_by_seq: given a device, return its
374 * sequence number in the LAG. Master is always 0, others numbered
375 * sequentially starting from 1.
376 */
mlx5_lag_get_dev_seq(struct mlx5_core_dev * dev)377 int mlx5_lag_get_dev_seq(struct mlx5_core_dev *dev)
378 {
379 struct mlx5_lag *ldev = mlx5_lag_dev(dev);
380 int master_idx, i, num = 1;
381 struct lag_func *pf;
382
383 if (!ldev)
384 return -ENOENT;
385
386 master_idx = mlx5_lag_get_master_idx(ldev);
387 if (master_idx < 0)
388 return -ENOENT;
389
390 pf = mlx5_lag_pf(ldev, master_idx);
391 if (pf && pf->dev == dev)
392 return 0;
393
394 mlx5_ldev_for_each(i, 0, ldev) {
395 if (i == master_idx)
396 continue;
397 pf = mlx5_lag_pf(ldev, i);
398 if (pf->dev == dev)
399 return num;
400 num++;
401 }
402 return -ENOENT;
403 }
404 EXPORT_SYMBOL(mlx5_lag_get_dev_seq);
405
406 /* Devcom events for LAG master marking */
407 #define LAG_DEVCOM_PAIR (0)
408 #define LAG_DEVCOM_UNPAIR (1)
409
mlx5_lag_mark_master(struct mlx5_lag * ldev)410 static void mlx5_lag_mark_master(struct mlx5_lag *ldev)
411 {
412 int lowest_dev_idx = INT_MAX;
413 struct lag_func *pf;
414 int master_xa_idx = -1;
415 int dev_idx;
416 int i;
417
418 mlx5_ldev_for_each(i, 0, ldev) {
419 pf = mlx5_lag_pf(ldev, i);
420 dev_idx = mlx5_get_dev_index(pf->dev);
421 if (dev_idx < lowest_dev_idx) {
422 lowest_dev_idx = dev_idx;
423 master_xa_idx = i;
424 }
425 }
426
427 if (master_xa_idx >= 0)
428 xa_set_mark(&ldev->pfs, master_xa_idx, MLX5_LAG_XA_MARK_MASTER);
429 }
430
mlx5_lag_clear_master(struct mlx5_lag * ldev)431 static void mlx5_lag_clear_master(struct mlx5_lag *ldev)
432 {
433 unsigned long idx = 0;
434 void *entry;
435
436 entry = xa_find(&ldev->pfs, &idx, U8_MAX, MLX5_LAG_XA_MARK_MASTER);
437 if (!entry)
438 return;
439
440 xa_clear_mark(&ldev->pfs, idx, MLX5_LAG_XA_MARK_MASTER);
441 }
442
443 /* Devcom event handler to manage LAG master marking */
mlx5_lag_devcom_event(int event,void * my_data,void * event_data)444 static int mlx5_lag_devcom_event(int event, void *my_data, void *event_data)
445 {
446 struct mlx5_core_dev *dev = my_data;
447 struct mlx5_lag *ldev;
448 int idx;
449
450 ldev = mlx5_lag_dev(dev);
451 if (!ldev)
452 return 0;
453
454 mutex_lock(&ldev->lock);
455 switch (event) {
456 case LAG_DEVCOM_PAIR:
457 /* No need to mark more than once */
458 idx = mlx5_lag_get_master_idx(ldev);
459 if (idx >= 0)
460 break;
461 /* Check if all LAG ports are now registered */
462 if (mlx5_lag_num_devs(ldev) == ldev->ports)
463 mlx5_lag_mark_master(ldev);
464 break;
465
466 case LAG_DEVCOM_UNPAIR:
467 /* Clear master mark when a device is removed */
468 mlx5_lag_clear_master(ldev);
469 break;
470 }
471 mutex_unlock(&ldev->lock);
472 return 0;
473 }
474
mlx5_lag_num_devs(struct mlx5_lag * ldev)475 int mlx5_lag_num_devs(struct mlx5_lag *ldev)
476 {
477 int i, num = 0;
478
479 if (!ldev)
480 return 0;
481
482 mlx5_ldev_for_each(i, 0, ldev) {
483 (void)i;
484 num++;
485 }
486 return num;
487 }
488
mlx5_lag_num_netdevs(struct mlx5_lag * ldev)489 int mlx5_lag_num_netdevs(struct mlx5_lag *ldev)
490 {
491 struct lag_func *pf;
492 int i, num = 0;
493
494 if (!ldev)
495 return 0;
496
497 mlx5_ldev_for_each(i, 0, ldev) {
498 pf = mlx5_lag_pf(ldev, i);
499 if (pf->netdev)
500 num++;
501 }
502 return num;
503 }
504
__mlx5_lag_is_roce(struct mlx5_lag * ldev)505 static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev)
506 {
507 return ldev->mode == MLX5_LAG_MODE_ROCE;
508 }
509
__mlx5_lag_is_sriov(struct mlx5_lag * ldev)510 static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev)
511 {
512 return ldev->mode == MLX5_LAG_MODE_SRIOV;
513 }
514
515 /* Create a mapping between steering slots and active ports.
516 * As we have ldev->buckets slots per port first assume the native
517 * mapping should be used.
518 * If there are ports that are disabled fill the relevant slots
519 * with mapping that points to active ports.
520 */
mlx5_infer_tx_affinity_mapping(struct lag_tracker * tracker,struct mlx5_lag * ldev,u8 buckets,u8 * ports)521 static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
522 struct mlx5_lag *ldev,
523 u8 buckets,
524 u8 *ports)
525 {
526 int disabled[MLX5_MAX_PORTS] = {};
527 int enabled[MLX5_MAX_PORTS] = {};
528 int disabled_ports_num = 0;
529 int enabled_ports_num = 0;
530 int idx;
531 u32 rand;
532 int i;
533 int j;
534
535 mlx5_ldev_for_each(i, 0, ldev) {
536 if (tracker->netdev_state[i].tx_enabled &&
537 tracker->netdev_state[i].link_up)
538 enabled[enabled_ports_num++] = i;
539 else
540 disabled[disabled_ports_num++] = i;
541 }
542
543 /* Use native mapping by default where each port's buckets
544 * point the native port: 1 1 1 .. 1 2 2 2 ... 2 3 3 3 ... 3 etc
545 * ports[] values are 1-indexed device indices for FW.
546 */
547 mlx5_ldev_for_each(i, 0, ldev) {
548 for (j = 0; j < buckets; j++) {
549 idx = i * buckets + j;
550 ports[idx] = mlx5_lag_xa_to_dev_idx(ldev, i) + 1;
551 }
552 }
553
554 /* If all ports are disabled/enabled keep native mapping */
555 if (enabled_ports_num == ldev->ports ||
556 disabled_ports_num == ldev->ports)
557 return;
558
559 /* Go over the disabled ports and for each assign a random active port */
560 for (i = 0; i < disabled_ports_num; i++) {
561 for (j = 0; j < buckets; j++) {
562 int rand_xa_idx;
563
564 get_random_bytes(&rand, 4);
565 rand_xa_idx = enabled[rand % enabled_ports_num];
566 ports[disabled[i] * buckets + j] =
567 mlx5_lag_xa_to_dev_idx(ldev, rand_xa_idx) + 1;
568 }
569 }
570 }
571
mlx5_lag_has_drop_rule(struct mlx5_lag * ldev)572 static bool mlx5_lag_has_drop_rule(struct mlx5_lag *ldev)
573 {
574 struct lag_func *pf;
575 int i;
576
577 mlx5_ldev_for_each(i, 0, ldev) {
578 pf = mlx5_lag_pf(ldev, i);
579 if (pf->has_drop)
580 return true;
581 }
582 return false;
583 }
584
mlx5_lag_drop_rule_cleanup(struct mlx5_lag * ldev)585 static void mlx5_lag_drop_rule_cleanup(struct mlx5_lag *ldev)
586 {
587 struct lag_func *pf;
588 int i;
589
590 mlx5_ldev_for_each(i, 0, ldev) {
591 pf = mlx5_lag_pf(ldev, i);
592 if (!pf->has_drop)
593 continue;
594
595 mlx5_esw_acl_ingress_vport_drop_rule_destroy(pf->dev->priv.eswitch,
596 MLX5_VPORT_UPLINK);
597 pf->has_drop = false;
598 }
599 }
600
mlx5_lag_drop_rule_setup(struct mlx5_lag * ldev,struct lag_tracker * tracker)601 static void mlx5_lag_drop_rule_setup(struct mlx5_lag *ldev,
602 struct lag_tracker *tracker)
603 {
604 u8 disabled_ports[MLX5_MAX_PORTS] = {};
605 struct mlx5_core_dev *dev;
606 struct lag_func *pf;
607 int disabled_index;
608 int num_disabled;
609 int err;
610 int i;
611
612 /* First delete the current drop rule so there won't be any dropped
613 * packets
614 */
615 mlx5_lag_drop_rule_cleanup(ldev);
616
617 if (!ldev->tracker.has_inactive)
618 return;
619
620 mlx5_infer_tx_disabled(tracker, ldev, disabled_ports, &num_disabled);
621
622 for (i = 0; i < num_disabled; i++) {
623 disabled_index = disabled_ports[i];
624 pf = mlx5_lag_pf(ldev, disabled_index);
625 dev = pf->dev;
626 err = mlx5_esw_acl_ingress_vport_drop_rule_create(dev->priv.eswitch,
627 MLX5_VPORT_UPLINK);
628 if (!err)
629 pf->has_drop = true;
630 else
631 mlx5_core_err(dev,
632 "Failed to create lag drop rule, error: %d", err);
633 }
634 }
635
mlx5_cmd_modify_active_port(struct mlx5_core_dev * dev,u8 ports)636 static int mlx5_cmd_modify_active_port(struct mlx5_core_dev *dev, u8 ports)
637 {
638 u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
639 void *lag_ctx;
640
641 lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
642
643 MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
644 MLX5_SET(modify_lag_in, in, field_select, 0x2);
645
646 MLX5_SET(lagc, lag_ctx, active_port, ports);
647
648 return mlx5_cmd_exec_in(dev, modify_lag, in);
649 }
650
_mlx5_modify_lag(struct mlx5_lag * ldev,u8 * ports)651 static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports)
652 {
653 int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
654 struct mlx5_core_dev *dev0;
655 u8 active_ports;
656 int ret;
657
658 if (idx < 0)
659 return -EINVAL;
660
661 dev0 = mlx5_lag_pf(ldev, idx)->dev;
662 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags)) {
663 ret = mlx5_lag_port_sel_modify(ldev, ports);
664 if (ret ||
665 !MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table_bypass))
666 return ret;
667
668 active_ports = lag_active_port_bits(ldev);
669
670 return mlx5_cmd_modify_active_port(dev0, active_ports);
671 }
672 return mlx5_cmd_modify_lag(dev0, ldev, ports);
673 }
674
mlx5_lag_active_backup_get_netdev(struct mlx5_core_dev * dev)675 static struct net_device *mlx5_lag_active_backup_get_netdev(struct mlx5_core_dev *dev)
676 {
677 struct net_device *ndev = NULL;
678 struct lag_func *pf;
679 struct mlx5_lag *ldev;
680 unsigned long flags;
681 int i, last_idx;
682
683 spin_lock_irqsave(&lag_lock, flags);
684 ldev = mlx5_lag_dev(dev);
685
686 if (!ldev)
687 goto unlock;
688
689 mlx5_ldev_for_each(i, 0, ldev) {
690 pf = mlx5_lag_pf(ldev, i);
691 if (ldev->tracker.netdev_state[i].tx_enabled)
692 ndev = pf->netdev;
693 }
694 if (!ndev) {
695 last_idx = mlx5_lag_get_dev_index_by_seq(ldev, ldev->ports - 1);
696 if (last_idx < 0)
697 goto unlock;
698 pf = mlx5_lag_pf(ldev, last_idx);
699 ndev = pf->netdev;
700 }
701
702 dev_hold(ndev);
703
704 unlock:
705 spin_unlock_irqrestore(&lag_lock, flags);
706
707 return ndev;
708 }
709
mlx5_modify_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker)710 void mlx5_modify_lag(struct mlx5_lag *ldev,
711 struct lag_tracker *tracker)
712 {
713 int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
714 u8 ports[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS] = {};
715 struct mlx5_core_dev *dev0;
716 int idx;
717 int err;
718 int i;
719 int j;
720
721 if (first_idx < 0)
722 return;
723
724 dev0 = mlx5_lag_pf(ldev, first_idx)->dev;
725 mlx5_infer_tx_affinity_mapping(tracker, ldev, ldev->buckets, ports);
726
727 mlx5_ldev_for_each(i, 0, ldev) {
728 for (j = 0; j < ldev->buckets; j++) {
729 idx = i * ldev->buckets + j;
730 if (ports[idx] == ldev->v2p_map[idx])
731 continue;
732 err = _mlx5_modify_lag(ldev, ports);
733 if (err) {
734 mlx5_core_err(dev0,
735 "Failed to modify LAG (%d)\n",
736 err);
737 return;
738 }
739 memcpy(ldev->v2p_map, ports, sizeof(ports));
740
741 mlx5_lag_print_mapping(dev0, ldev, tracker,
742 ldev->mode_flags);
743 break;
744 }
745 }
746
747 if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
748 struct net_device *ndev = mlx5_lag_active_backup_get_netdev(dev0);
749
750 if(!(ldev->mode == MLX5_LAG_MODE_ROCE))
751 mlx5_lag_drop_rule_setup(ldev, tracker);
752 /** Only sriov and roce lag should have tracker->tx_type set so
753 * no need to check the mode
754 */
755 blocking_notifier_call_chain(&dev0->priv.lag_nh,
756 MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE,
757 ndev);
758 dev_put(ndev);
759 }
760 }
761
mlx5_lag_set_port_sel_mode(struct mlx5_lag * ldev,enum mlx5_lag_mode mode,unsigned long * flags)762 static int mlx5_lag_set_port_sel_mode(struct mlx5_lag *ldev,
763 enum mlx5_lag_mode mode,
764 unsigned long *flags)
765 {
766 int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
767 struct mlx5_core_dev *dev0;
768
769 if (first_idx < 0)
770 return -EINVAL;
771
772 if (mode == MLX5_LAG_MODE_MPESW ||
773 mode == MLX5_LAG_MODE_MULTIPATH)
774 return 0;
775
776 dev0 = mlx5_lag_pf(ldev, first_idx)->dev;
777
778 if (!MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table)) {
779 if (ldev->ports > 2)
780 return -EINVAL;
781 return 0;
782 }
783
784 if (ldev->ports > 2)
785 ldev->buckets = MLX5_LAG_MAX_HASH_BUCKETS;
786
787 set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags);
788
789 return 0;
790 }
791
mlx5_lag_set_flags(struct mlx5_lag * ldev,enum mlx5_lag_mode mode,struct lag_tracker * tracker,bool shared_fdb,unsigned long * flags)792 static int mlx5_lag_set_flags(struct mlx5_lag *ldev, enum mlx5_lag_mode mode,
793 struct lag_tracker *tracker, bool shared_fdb,
794 unsigned long *flags)
795 {
796 *flags = 0;
797 if (shared_fdb) {
798 set_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, flags);
799 set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags);
800 }
801
802 if (mode == MLX5_LAG_MODE_MPESW)
803 set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags);
804
805 return mlx5_lag_set_port_sel_mode(ldev, mode, flags);
806 }
807
mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode,unsigned long flags)808 char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags)
809 {
810 int port_sel_mode = get_port_sel_mode(mode, flags);
811
812 switch (port_sel_mode) {
813 case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY: return "queue_affinity";
814 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT: return "hash";
815 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW: return "mpesw";
816 default: return "invalid";
817 }
818 }
819
mlx5_lag_create_single_fdb(struct mlx5_lag * ldev)820 static int mlx5_lag_create_single_fdb(struct mlx5_lag *ldev)
821 {
822 int master_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
823 struct mlx5_eswitch *master_esw;
824 struct mlx5_core_dev *dev0;
825 int i, j;
826 int err;
827
828 if (master_idx < 0)
829 return -EINVAL;
830
831 dev0 = mlx5_lag_pf(ldev, master_idx)->dev;
832 master_esw = dev0->priv.eswitch;
833 mlx5_ldev_for_each(i, 0, ldev) {
834 struct mlx5_eswitch *slave_esw;
835
836 if (i == master_idx)
837 continue;
838
839 slave_esw = mlx5_lag_pf(ldev, i)->dev->priv.eswitch;
840
841 err = mlx5_eswitch_offloads_single_fdb_add_one(master_esw,
842 slave_esw, ldev->ports);
843 if (err)
844 goto err;
845 }
846 return 0;
847 err:
848 mlx5_ldev_for_each_reverse(j, i, 0, ldev) {
849 if (j == master_idx)
850 continue;
851 mlx5_eswitch_offloads_single_fdb_del_one(master_esw,
852 mlx5_lag_pf(ldev, j)->dev->priv.eswitch);
853 }
854 return err;
855 }
856
mlx5_create_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker,enum mlx5_lag_mode mode,unsigned long flags)857 static int mlx5_create_lag(struct mlx5_lag *ldev,
858 struct lag_tracker *tracker,
859 enum mlx5_lag_mode mode,
860 unsigned long flags)
861 {
862 int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
863 bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
864 u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
865 struct mlx5_core_dev *dev0;
866 int err;
867
868 if (first_idx < 0)
869 return -EINVAL;
870
871 dev0 = mlx5_lag_pf(ldev, first_idx)->dev;
872 if (tracker)
873 mlx5_lag_print_mapping(dev0, ldev, tracker, flags);
874 mlx5_core_info(dev0, "shared_fdb:%d mode:%s\n",
875 shared_fdb, mlx5_get_str_port_sel_mode(mode, flags));
876
877 err = mlx5_cmd_create_lag(dev0, ldev, mode, flags);
878 if (err) {
879 mlx5_core_err(dev0,
880 "Failed to create LAG (%d)\n",
881 err);
882 return err;
883 }
884
885 if (shared_fdb) {
886 err = mlx5_lag_create_single_fdb(ldev);
887 if (err)
888 mlx5_core_err(dev0, "Can't enable single FDB mode\n");
889 else
890 mlx5_core_info(dev0, "Operation mode is single FDB\n");
891 }
892
893 if (err) {
894 MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
895 if (mlx5_cmd_exec_in(dev0, destroy_lag, in))
896 mlx5_core_err(dev0,
897 "Failed to deactivate RoCE LAG; driver restart required\n");
898 }
899 BLOCKING_INIT_NOTIFIER_HEAD(&dev0->priv.lag_nh);
900
901 return err;
902 }
903
mlx5_activate_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker,enum mlx5_lag_mode mode,bool shared_fdb)904 int mlx5_activate_lag(struct mlx5_lag *ldev,
905 struct lag_tracker *tracker,
906 enum mlx5_lag_mode mode,
907 bool shared_fdb)
908 {
909 bool roce_lag = mode == MLX5_LAG_MODE_ROCE;
910 struct mlx5_core_dev *dev0;
911 unsigned long flags = 0;
912 int master_idx;
913 int err;
914
915 master_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
916 if (master_idx < 0)
917 return -EINVAL;
918
919 dev0 = mlx5_lag_pf(ldev, master_idx)->dev;
920 err = mlx5_lag_set_flags(ldev, mode, tracker, shared_fdb, &flags);
921 if (err)
922 return err;
923
924 if (mode != MLX5_LAG_MODE_MPESW) {
925 mlx5_infer_tx_affinity_mapping(tracker, ldev, ldev->buckets, ldev->v2p_map);
926 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
927 err = mlx5_lag_port_sel_create(ldev, tracker->hash_type,
928 ldev->v2p_map);
929 if (err) {
930 mlx5_core_err(dev0,
931 "Failed to create LAG port selection(%d)\n",
932 err);
933 return err;
934 }
935 }
936 }
937
938 err = mlx5_create_lag(ldev, tracker, mode, flags);
939 if (err) {
940 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
941 mlx5_lag_port_sel_destroy(ldev);
942 if (roce_lag)
943 mlx5_core_err(dev0,
944 "Failed to activate RoCE LAG\n");
945 else
946 mlx5_core_err(dev0,
947 "Failed to activate VF LAG\n"
948 "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
949 return err;
950 }
951
952 if (tracker && tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP &&
953 !roce_lag)
954 mlx5_lag_drop_rule_setup(ldev, tracker);
955
956 ldev->mode = mode;
957 ldev->mode_flags = flags;
958 return 0;
959 }
960
mlx5_deactivate_lag(struct mlx5_lag * ldev)961 int mlx5_deactivate_lag(struct mlx5_lag *ldev)
962 {
963 int master_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
964 u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
965 bool roce_lag = __mlx5_lag_is_roce(ldev);
966 unsigned long flags = ldev->mode_flags;
967 struct mlx5_eswitch *master_esw;
968 struct mlx5_core_dev *dev0;
969 int err;
970 int i;
971
972 if (master_idx < 0)
973 return -EINVAL;
974
975 dev0 = mlx5_lag_pf(ldev, master_idx)->dev;
976 master_esw = dev0->priv.eswitch;
977 ldev->mode = MLX5_LAG_MODE_NONE;
978 ldev->mode_flags = 0;
979 mlx5_lag_mp_reset(ldev);
980
981 if (test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags)) {
982 mlx5_ldev_for_each(i, 0, ldev) {
983 if (i == master_idx)
984 continue;
985 mlx5_eswitch_offloads_single_fdb_del_one(master_esw,
986 mlx5_lag_pf(ldev, i)->dev->priv.eswitch);
987 }
988 clear_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
989 }
990
991 MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
992 err = mlx5_cmd_exec_in(dev0, destroy_lag, in);
993 if (err) {
994 if (roce_lag) {
995 mlx5_core_err(dev0,
996 "Failed to deactivate RoCE LAG; driver restart required\n");
997 } else {
998 mlx5_core_err(dev0,
999 "Failed to deactivate VF LAG; driver restart required\n"
1000 "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
1001 }
1002 return err;
1003 }
1004
1005 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
1006 mlx5_lag_port_sel_destroy(ldev);
1007 ldev->buckets = 1;
1008 }
1009 if (mlx5_lag_has_drop_rule(ldev))
1010 mlx5_lag_drop_rule_cleanup(ldev);
1011
1012 return 0;
1013 }
1014
mlx5_lag_check_prereq(struct mlx5_lag * ldev)1015 bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
1016 {
1017 int master_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
1018 #ifdef CONFIG_MLX5_ESWITCH
1019 struct mlx5_core_dev *dev;
1020 u8 mode;
1021 #endif
1022 struct lag_func *pf;
1023 bool roce_support;
1024 int i;
1025
1026 if (master_idx < 0 || mlx5_lag_num_devs(ldev) != ldev->ports)
1027 return false;
1028
1029 #ifdef CONFIG_MLX5_ESWITCH
1030 mlx5_ldev_for_each(i, 0, ldev) {
1031 pf = mlx5_lag_pf(ldev, i);
1032 dev = pf->dev;
1033 if (mlx5_eswitch_num_vfs(dev->priv.eswitch) && !is_mdev_switchdev_mode(dev))
1034 return false;
1035 }
1036
1037 pf = mlx5_lag_pf(ldev, master_idx);
1038 dev = pf->dev;
1039 mode = mlx5_eswitch_mode(dev);
1040 mlx5_ldev_for_each(i, 0, ldev) {
1041 pf = mlx5_lag_pf(ldev, i);
1042 if (mlx5_eswitch_mode(pf->dev) != mode)
1043 return false;
1044 }
1045
1046 #else
1047 mlx5_ldev_for_each(i, 0, ldev) {
1048 pf = mlx5_lag_pf(ldev, i);
1049 if (mlx5_sriov_is_enabled(pf->dev))
1050 return false;
1051 }
1052 #endif
1053 pf = mlx5_lag_pf(ldev, master_idx);
1054 roce_support = mlx5_get_roce_state(pf->dev);
1055 mlx5_ldev_for_each(i, 0, ldev) {
1056 if (i == master_idx)
1057 continue;
1058 pf = mlx5_lag_pf(ldev, i);
1059 if (mlx5_get_roce_state(pf->dev) != roce_support)
1060 return false;
1061 }
1062
1063 return true;
1064 }
1065
mlx5_lag_add_devices(struct mlx5_lag * ldev)1066 void mlx5_lag_add_devices(struct mlx5_lag *ldev)
1067 {
1068 struct lag_func *pf;
1069 int i;
1070
1071 mlx5_ldev_for_each(i, 0, ldev) {
1072 pf = mlx5_lag_pf(ldev, i);
1073 if (pf->dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
1074 continue;
1075
1076 pf->dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
1077 mlx5_rescan_drivers_locked(pf->dev);
1078 }
1079 }
1080
mlx5_lag_remove_devices(struct mlx5_lag * ldev)1081 void mlx5_lag_remove_devices(struct mlx5_lag *ldev)
1082 {
1083 struct lag_func *pf;
1084 int i;
1085
1086 mlx5_ldev_for_each(i, 0, ldev) {
1087 pf = mlx5_lag_pf(ldev, i);
1088 if (pf->dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
1089 continue;
1090
1091 pf->dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
1092 mlx5_rescan_drivers_locked(pf->dev);
1093 }
1094 }
1095
mlx5_disable_lag(struct mlx5_lag * ldev)1096 void mlx5_disable_lag(struct mlx5_lag *ldev)
1097 {
1098 bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
1099 int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
1100 struct mlx5_core_dev *dev0;
1101 bool roce_lag;
1102 int err;
1103 int i;
1104
1105 if (idx < 0)
1106 return;
1107
1108 dev0 = mlx5_lag_pf(ldev, idx)->dev;
1109 roce_lag = __mlx5_lag_is_roce(ldev);
1110
1111 if (shared_fdb) {
1112 mlx5_lag_remove_devices(ldev);
1113 } else if (roce_lag) {
1114 if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) {
1115 dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
1116 mlx5_rescan_drivers_locked(dev0);
1117 }
1118 mlx5_ldev_for_each(i, 0, ldev) {
1119 if (i == idx)
1120 continue;
1121 mlx5_nic_vport_disable_roce(mlx5_lag_pf(ldev, i)->dev);
1122 }
1123 }
1124
1125 err = mlx5_deactivate_lag(ldev);
1126 if (err)
1127 return;
1128
1129 if (shared_fdb || roce_lag)
1130 mlx5_lag_add_devices(ldev);
1131
1132 if (shared_fdb)
1133 mlx5_ldev_for_each(i, 0, ldev)
1134 if (!(mlx5_lag_pf(ldev, i)->dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
1135 mlx5_eswitch_reload_ib_reps(mlx5_lag_pf(ldev, i)->dev->priv.eswitch);
1136 }
1137
mlx5_lag_shared_fdb_supported(struct mlx5_lag * ldev)1138 bool mlx5_lag_shared_fdb_supported(struct mlx5_lag *ldev)
1139 {
1140 struct mlx5_core_dev *dev;
1141 bool ret = false;
1142 int idx;
1143 int i;
1144
1145 idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
1146 if (idx < 0)
1147 return false;
1148
1149 mlx5_ldev_for_each(i, 0, ldev) {
1150 if (i == idx)
1151 continue;
1152 dev = mlx5_lag_pf(ldev, i)->dev;
1153 if (is_mdev_switchdev_mode(dev) &&
1154 mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) &&
1155 MLX5_CAP_GEN(dev, lag_native_fdb_selection) &&
1156 MLX5_CAP_ESW(dev, root_ft_on_other_esw) &&
1157 mlx5_eswitch_get_npeers(dev->priv.eswitch) ==
1158 MLX5_CAP_GEN(dev, num_lag_ports) - 1)
1159 continue;
1160 return false;
1161 }
1162
1163 dev = mlx5_lag_pf(ldev, idx)->dev;
1164 if (is_mdev_switchdev_mode(dev) &&
1165 mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) &&
1166 mlx5_esw_offloads_devcom_is_ready(dev->priv.eswitch) &&
1167 MLX5_CAP_ESW(dev, esw_shared_ingress_acl) &&
1168 mlx5_eswitch_get_npeers(dev->priv.eswitch) == MLX5_CAP_GEN(dev, num_lag_ports) - 1)
1169 ret = true;
1170
1171 return ret;
1172 }
1173
mlx5_lag_is_roce_lag(struct mlx5_lag * ldev)1174 static bool mlx5_lag_is_roce_lag(struct mlx5_lag *ldev)
1175 {
1176 bool roce_lag = true;
1177 struct lag_func *pf;
1178 int i;
1179
1180 mlx5_ldev_for_each(i, 0, ldev) {
1181 pf = mlx5_lag_pf(ldev, i);
1182 roce_lag = roce_lag && !mlx5_sriov_is_enabled(pf->dev);
1183 }
1184
1185 #ifdef CONFIG_MLX5_ESWITCH
1186 mlx5_ldev_for_each(i, 0, ldev) {
1187 pf = mlx5_lag_pf(ldev, i);
1188 roce_lag = roce_lag && is_mdev_legacy_mode(pf->dev);
1189 }
1190 #endif
1191
1192 return roce_lag;
1193 }
1194
mlx5_lag_should_modify_lag(struct mlx5_lag * ldev,bool do_bond)1195 static bool mlx5_lag_should_modify_lag(struct mlx5_lag *ldev, bool do_bond)
1196 {
1197 return do_bond && __mlx5_lag_is_active(ldev) &&
1198 ldev->mode != MLX5_LAG_MODE_MPESW;
1199 }
1200
mlx5_lag_should_disable_lag(struct mlx5_lag * ldev,bool do_bond)1201 static bool mlx5_lag_should_disable_lag(struct mlx5_lag *ldev, bool do_bond)
1202 {
1203 return !do_bond && __mlx5_lag_is_active(ldev) &&
1204 ldev->mode != MLX5_LAG_MODE_MPESW;
1205 }
1206
1207 #ifdef CONFIG_MLX5_ESWITCH
1208 static int
mlx5_lag_sum_devices_speed(struct mlx5_lag * ldev,u32 * sum_speed,int (* get_speed)(struct mlx5_core_dev *,u32 *))1209 mlx5_lag_sum_devices_speed(struct mlx5_lag *ldev, u32 *sum_speed,
1210 int (*get_speed)(struct mlx5_core_dev *, u32 *))
1211 {
1212 struct mlx5_core_dev *pf_mdev;
1213 struct lag_func *pf;
1214 int pf_idx;
1215 u32 speed;
1216 int ret;
1217
1218 *sum_speed = 0;
1219 mlx5_ldev_for_each(pf_idx, 0, ldev) {
1220 pf = mlx5_lag_pf(ldev, pf_idx);
1221 if (!pf)
1222 continue;
1223 pf_mdev = pf->dev;
1224 if (!pf_mdev)
1225 continue;
1226
1227 ret = get_speed(pf_mdev, &speed);
1228 if (ret) {
1229 mlx5_core_dbg(pf_mdev,
1230 "Failed to get device speed using %ps. Device %s speed is not available (err=%d)\n",
1231 get_speed, dev_name(pf_mdev->device),
1232 ret);
1233 return ret;
1234 }
1235
1236 *sum_speed += speed;
1237 }
1238
1239 return 0;
1240 }
1241
mlx5_lag_sum_devices_max_speed(struct mlx5_lag * ldev,u32 * max_speed)1242 static int mlx5_lag_sum_devices_max_speed(struct mlx5_lag *ldev, u32 *max_speed)
1243 {
1244 return mlx5_lag_sum_devices_speed(ldev, max_speed,
1245 mlx5_port_max_linkspeed);
1246 }
1247
mlx5_lag_sum_devices_oper_speed(struct mlx5_lag * ldev,u32 * oper_speed)1248 static int mlx5_lag_sum_devices_oper_speed(struct mlx5_lag *ldev,
1249 u32 *oper_speed)
1250 {
1251 return mlx5_lag_sum_devices_speed(ldev, oper_speed,
1252 mlx5_port_oper_linkspeed);
1253 }
1254
mlx5_lag_modify_device_vports_speed(struct mlx5_core_dev * mdev,u32 speed)1255 static void mlx5_lag_modify_device_vports_speed(struct mlx5_core_dev *mdev,
1256 u32 speed)
1257 {
1258 u16 op_mod = MLX5_VPORT_STATE_OP_MOD_ESW_VPORT;
1259 struct mlx5_eswitch *esw = mdev->priv.eswitch;
1260 struct mlx5_vport *vport;
1261 unsigned long i;
1262 int ret;
1263
1264 if (!esw)
1265 return;
1266
1267 if (!MLX5_CAP_ESW(mdev, esw_vport_state_max_tx_speed))
1268 return;
1269
1270 mlx5_esw_for_each_vport(esw, i, vport) {
1271 if (!vport)
1272 continue;
1273
1274 if (vport->vport == MLX5_VPORT_UPLINK)
1275 continue;
1276
1277 vport->agg_max_tx_speed = speed;
1278
1279 if (!vport->enabled)
1280 continue;
1281
1282 ret = mlx5_modify_vport_max_tx_speed(mdev, op_mod,
1283 vport->vport, true, speed);
1284 if (ret)
1285 mlx5_core_dbg(mdev,
1286 "Failed to set vport %d speed %d, err=%d\n",
1287 vport->vport, speed, ret);
1288 }
1289 }
1290
mlx5_lag_set_vports_agg_speed(struct mlx5_lag * ldev)1291 void mlx5_lag_set_vports_agg_speed(struct mlx5_lag *ldev)
1292 {
1293 struct mlx5_core_dev *mdev;
1294 struct lag_func *pf;
1295 u32 speed;
1296 int pf_idx;
1297
1298 if (ldev->mode == MLX5_LAG_MODE_MPESW) {
1299 if (mlx5_lag_sum_devices_oper_speed(ldev, &speed))
1300 return;
1301 } else {
1302 speed = ldev->tracker.bond_speed_mbps;
1303 if (speed == SPEED_UNKNOWN)
1304 return;
1305 }
1306
1307 /* If speed is not set, use the sum of max speeds of all PFs */
1308 if (!speed && mlx5_lag_sum_devices_max_speed(ldev, &speed))
1309 return;
1310
1311 speed = speed / MLX5_MAX_TX_SPEED_UNIT;
1312
1313 mlx5_ldev_for_each(pf_idx, 0, ldev) {
1314 pf = mlx5_lag_pf(ldev, pf_idx);
1315 if (!pf)
1316 continue;
1317 mdev = pf->dev;
1318 if (!mdev)
1319 continue;
1320
1321 mlx5_lag_modify_device_vports_speed(mdev, speed);
1322 }
1323 }
1324
mlx5_lag_reset_vports_speed(struct mlx5_lag * ldev)1325 void mlx5_lag_reset_vports_speed(struct mlx5_lag *ldev)
1326 {
1327 struct mlx5_core_dev *mdev;
1328 struct lag_func *pf;
1329 u32 speed;
1330 int pf_idx;
1331 int ret;
1332
1333 mlx5_ldev_for_each(pf_idx, 0, ldev) {
1334 pf = mlx5_lag_pf(ldev, pf_idx);
1335 if (!pf)
1336 continue;
1337 mdev = pf->dev;
1338 if (!mdev)
1339 continue;
1340
1341 ret = mlx5_port_oper_linkspeed(mdev, &speed);
1342 if (ret) {
1343 mlx5_core_dbg(mdev,
1344 "Failed to reset vports speed for device %s. Oper speed is not available (err=%d)\n",
1345 dev_name(mdev->device), ret);
1346 continue;
1347 }
1348
1349 speed = speed / MLX5_MAX_TX_SPEED_UNIT;
1350 mlx5_lag_modify_device_vports_speed(mdev, speed);
1351 }
1352 }
1353 #endif
1354
mlx5_do_bond(struct mlx5_lag * ldev)1355 static void mlx5_do_bond(struct mlx5_lag *ldev)
1356 {
1357 int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
1358 struct lag_tracker tracker = { };
1359 struct mlx5_core_dev *dev0;
1360 struct net_device *ndev;
1361 bool do_bond, roce_lag;
1362 int err;
1363 int i;
1364
1365 if (idx < 0)
1366 return;
1367
1368 dev0 = mlx5_lag_pf(ldev, idx)->dev;
1369 if (!mlx5_lag_is_ready(ldev)) {
1370 do_bond = false;
1371 } else {
1372 /* VF LAG is in multipath mode, ignore bond change requests */
1373 if (mlx5_lag_is_multipath(dev0))
1374 return;
1375
1376 tracker = ldev->tracker;
1377
1378 do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
1379 }
1380
1381 if (do_bond && !__mlx5_lag_is_active(ldev)) {
1382 bool shared_fdb = mlx5_lag_shared_fdb_supported(ldev);
1383
1384 roce_lag = mlx5_lag_is_roce_lag(ldev);
1385
1386 if (shared_fdb || roce_lag)
1387 mlx5_lag_remove_devices(ldev);
1388
1389 err = mlx5_activate_lag(ldev, &tracker,
1390 roce_lag ? MLX5_LAG_MODE_ROCE :
1391 MLX5_LAG_MODE_SRIOV,
1392 shared_fdb);
1393 if (err) {
1394 if (shared_fdb || roce_lag)
1395 mlx5_lag_add_devices(ldev);
1396 if (shared_fdb) {
1397 mlx5_ldev_for_each(i, 0, ldev)
1398 mlx5_eswitch_reload_ib_reps(mlx5_lag_pf(ldev, i)->dev->priv.eswitch);
1399 }
1400
1401 return;
1402 }
1403
1404 if (roce_lag) {
1405 struct mlx5_core_dev *dev;
1406
1407 dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
1408 mlx5_rescan_drivers_locked(dev0);
1409 mlx5_ldev_for_each(i, 0, ldev) {
1410 if (i == idx)
1411 continue;
1412 dev = mlx5_lag_pf(ldev, i)->dev;
1413 if (mlx5_get_roce_state(dev))
1414 mlx5_nic_vport_enable_roce(dev);
1415 }
1416 } else if (shared_fdb) {
1417 int i;
1418
1419 dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
1420 mlx5_rescan_drivers_locked(dev0);
1421
1422 mlx5_ldev_for_each(i, 0, ldev) {
1423 err = mlx5_eswitch_reload_ib_reps(mlx5_lag_pf(ldev, i)->dev->priv.eswitch);
1424 if (err)
1425 break;
1426 }
1427
1428 if (err) {
1429 dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
1430 mlx5_rescan_drivers_locked(dev0);
1431 mlx5_deactivate_lag(ldev);
1432 mlx5_lag_add_devices(ldev);
1433 mlx5_ldev_for_each(i, 0, ldev)
1434 mlx5_eswitch_reload_ib_reps(mlx5_lag_pf(ldev, i)->dev->priv.eswitch);
1435 mlx5_core_err(dev0, "Failed to enable lag\n");
1436 return;
1437 }
1438 }
1439 if (tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
1440 ndev = mlx5_lag_active_backup_get_netdev(dev0);
1441 /** Only sriov and roce lag should have tracker->TX_type
1442 * set so no need to check the mode
1443 */
1444 blocking_notifier_call_chain(&dev0->priv.lag_nh,
1445 MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE,
1446 ndev);
1447 dev_put(ndev);
1448 }
1449 mlx5_lag_set_vports_agg_speed(ldev);
1450 } else if (mlx5_lag_should_modify_lag(ldev, do_bond)) {
1451 mlx5_modify_lag(ldev, &tracker);
1452 mlx5_lag_set_vports_agg_speed(ldev);
1453 } else if (mlx5_lag_should_disable_lag(ldev, do_bond)) {
1454 mlx5_lag_reset_vports_speed(ldev);
1455 mlx5_disable_lag(ldev);
1456 }
1457 }
1458
1459 /* The last mdev to unregister will destroy the workqueue before removing the
1460 * devcom component, and as all the mdevs use the same devcom component we are
1461 * guaranteed that the devcom is valid while the calling work is running.
1462 */
mlx5_lag_get_devcom_comp(struct mlx5_lag * ldev)1463 struct mlx5_devcom_comp_dev *mlx5_lag_get_devcom_comp(struct mlx5_lag *ldev)
1464 {
1465 struct mlx5_devcom_comp_dev *devcom = NULL;
1466 struct lag_func *pf;
1467 int i;
1468
1469 mutex_lock(&ldev->lock);
1470 i = mlx5_get_next_ldev_func(ldev, 0);
1471 if (i < MLX5_MAX_PORTS) {
1472 pf = mlx5_lag_pf(ldev, i);
1473 devcom = pf->dev->priv.hca_devcom_comp;
1474 }
1475 mutex_unlock(&ldev->lock);
1476 return devcom;
1477 }
1478
mlx5_lag_demux_ft_fg_init(struct mlx5_core_dev * dev,struct mlx5_flow_table_attr * ft_attr,struct mlx5_lag * ldev)1479 static int mlx5_lag_demux_ft_fg_init(struct mlx5_core_dev *dev,
1480 struct mlx5_flow_table_attr *ft_attr,
1481 struct mlx5_lag *ldev)
1482 {
1483 #ifdef CONFIG_MLX5_ESWITCH
1484 struct mlx5_flow_namespace *ns;
1485 struct mlx5_flow_group *fg;
1486 int err;
1487
1488 ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_LAG);
1489 if (!ns)
1490 return 0;
1491
1492 ldev->lag_demux_ft = mlx5_create_flow_table(ns, ft_attr);
1493 if (IS_ERR(ldev->lag_demux_ft))
1494 return PTR_ERR(ldev->lag_demux_ft);
1495
1496 fg = mlx5_esw_lag_demux_fg_create(dev->priv.eswitch,
1497 ldev->lag_demux_ft);
1498 if (IS_ERR(fg)) {
1499 err = PTR_ERR(fg);
1500 mlx5_destroy_flow_table(ldev->lag_demux_ft);
1501 ldev->lag_demux_ft = NULL;
1502 return err;
1503 }
1504
1505 ldev->lag_demux_fg = fg;
1506 return 0;
1507 #else
1508 return -EOPNOTSUPP;
1509 #endif
1510 }
1511
mlx5_lag_demux_fw_init(struct mlx5_core_dev * dev,struct mlx5_flow_table_attr * ft_attr,struct mlx5_lag * ldev)1512 static int mlx5_lag_demux_fw_init(struct mlx5_core_dev *dev,
1513 struct mlx5_flow_table_attr *ft_attr,
1514 struct mlx5_lag *ldev)
1515 {
1516 struct mlx5_flow_namespace *ns;
1517 int err;
1518
1519 ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_LAG);
1520 if (!ns)
1521 return 0;
1522
1523 ldev->lag_demux_fg = NULL;
1524 ft_attr->max_fte = 1;
1525 ldev->lag_demux_ft = mlx5_create_lag_demux_flow_table(ns, ft_attr);
1526 if (IS_ERR(ldev->lag_demux_ft)) {
1527 err = PTR_ERR(ldev->lag_demux_ft);
1528 ldev->lag_demux_ft = NULL;
1529 return err;
1530 }
1531
1532 return 0;
1533 }
1534
mlx5_lag_demux_init(struct mlx5_core_dev * dev,struct mlx5_flow_table_attr * ft_attr)1535 int mlx5_lag_demux_init(struct mlx5_core_dev *dev,
1536 struct mlx5_flow_table_attr *ft_attr)
1537 {
1538 struct mlx5_lag *ldev;
1539
1540 if (!ft_attr)
1541 return -EINVAL;
1542
1543 ldev = mlx5_lag_dev(dev);
1544 if (!ldev)
1545 return -ENODEV;
1546
1547 xa_init(&ldev->lag_demux_rules);
1548
1549 if (mlx5_get_sd(dev))
1550 return mlx5_lag_demux_ft_fg_init(dev, ft_attr, ldev);
1551
1552 return mlx5_lag_demux_fw_init(dev, ft_attr, ldev);
1553 }
1554 EXPORT_SYMBOL(mlx5_lag_demux_init);
1555
mlx5_lag_demux_cleanup(struct mlx5_core_dev * dev)1556 void mlx5_lag_demux_cleanup(struct mlx5_core_dev *dev)
1557 {
1558 struct mlx5_flow_handle *rule;
1559 struct mlx5_lag *ldev;
1560 unsigned long vport_num;
1561
1562 ldev = mlx5_lag_dev(dev);
1563 if (!ldev)
1564 return;
1565
1566 xa_for_each(&ldev->lag_demux_rules, vport_num, rule)
1567 mlx5_del_flow_rules(rule);
1568 xa_destroy(&ldev->lag_demux_rules);
1569
1570 if (ldev->lag_demux_fg)
1571 mlx5_destroy_flow_group(ldev->lag_demux_fg);
1572 if (ldev->lag_demux_ft)
1573 mlx5_destroy_flow_table(ldev->lag_demux_ft);
1574 ldev->lag_demux_fg = NULL;
1575 ldev->lag_demux_ft = NULL;
1576 }
1577 EXPORT_SYMBOL(mlx5_lag_demux_cleanup);
1578
mlx5_lag_demux_rule_add(struct mlx5_core_dev * vport_dev,u16 vport_num,int index)1579 int mlx5_lag_demux_rule_add(struct mlx5_core_dev *vport_dev, u16 vport_num,
1580 int index)
1581 {
1582 struct mlx5_flow_handle *rule;
1583 struct mlx5_lag *ldev;
1584 int err;
1585
1586 ldev = mlx5_lag_dev(vport_dev);
1587 if (!ldev || !ldev->lag_demux_fg)
1588 return 0;
1589
1590 if (xa_load(&ldev->lag_demux_rules, index))
1591 return 0;
1592
1593 rule = mlx5_esw_lag_demux_rule_create(vport_dev->priv.eswitch,
1594 vport_num, ldev->lag_demux_ft);
1595 if (IS_ERR(rule)) {
1596 err = PTR_ERR(rule);
1597 mlx5_core_warn(vport_dev,
1598 "Failed to create LAG demux rule for vport %u, err %d\n",
1599 vport_num, err);
1600 return err;
1601 }
1602
1603 err = xa_err(xa_store(&ldev->lag_demux_rules, index, rule,
1604 GFP_KERNEL));
1605 if (err) {
1606 mlx5_del_flow_rules(rule);
1607 mlx5_core_warn(vport_dev,
1608 "Failed to store LAG demux rule for vport %u, err %d\n",
1609 vport_num, err);
1610 }
1611
1612 return err;
1613 }
1614 EXPORT_SYMBOL(mlx5_lag_demux_rule_add);
1615
mlx5_lag_demux_rule_del(struct mlx5_core_dev * dev,int index)1616 void mlx5_lag_demux_rule_del(struct mlx5_core_dev *dev, int index)
1617 {
1618 struct mlx5_flow_handle *rule;
1619 struct mlx5_lag *ldev;
1620
1621 ldev = mlx5_lag_dev(dev);
1622 if (!ldev || !ldev->lag_demux_fg)
1623 return;
1624
1625 rule = xa_erase(&ldev->lag_demux_rules, index);
1626 if (rule)
1627 mlx5_del_flow_rules(rule);
1628 }
1629 EXPORT_SYMBOL(mlx5_lag_demux_rule_del);
1630
mlx5_queue_bond_work(struct mlx5_lag * ldev,unsigned long delay)1631 static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
1632 {
1633 queue_delayed_work(ldev->wq, &ldev->bond_work, delay);
1634 }
1635
mlx5_do_bond_work(struct work_struct * work)1636 static void mlx5_do_bond_work(struct work_struct *work)
1637 {
1638 struct delayed_work *delayed_work = to_delayed_work(work);
1639 struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag,
1640 bond_work);
1641 struct mlx5_devcom_comp_dev *devcom;
1642 int status;
1643
1644 devcom = mlx5_lag_get_devcom_comp(ldev);
1645 if (!devcom)
1646 return;
1647
1648 status = mlx5_devcom_comp_trylock(devcom);
1649 if (!status) {
1650 mlx5_queue_bond_work(ldev, HZ);
1651 return;
1652 }
1653
1654 mutex_lock(&ldev->lock);
1655 if (ldev->mode_changes_in_progress) {
1656 mutex_unlock(&ldev->lock);
1657 mlx5_devcom_comp_unlock(devcom);
1658 mlx5_queue_bond_work(ldev, HZ);
1659 return;
1660 }
1661
1662 mlx5_do_bond(ldev);
1663 mutex_unlock(&ldev->lock);
1664 mlx5_devcom_comp_unlock(devcom);
1665 }
1666
mlx5_handle_changeupper_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct netdev_notifier_changeupper_info * info)1667 static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
1668 struct lag_tracker *tracker,
1669 struct netdev_notifier_changeupper_info *info)
1670 {
1671 struct net_device *upper = info->upper_dev, *ndev_tmp;
1672 struct netdev_lag_upper_info *lag_upper_info = NULL;
1673 bool is_bonded, is_in_lag, mode_supported;
1674 bool has_inactive = 0;
1675 struct lag_func *pf;
1676 struct slave *slave;
1677 u8 bond_status = 0;
1678 int num_slaves = 0;
1679 int changed = 0;
1680 int i, idx = -1;
1681
1682 if (!netif_is_lag_master(upper))
1683 return 0;
1684
1685 if (info->linking)
1686 lag_upper_info = info->upper_info;
1687
1688 /* The event may still be of interest if the slave does not belong to
1689 * us, but is enslaved to a master which has one or more of our netdevs
1690 * as slaves (e.g., if a new slave is added to a master that bonds two
1691 * of our netdevs, we should unbond).
1692 */
1693 rcu_read_lock();
1694 for_each_netdev_in_bond_rcu(upper, ndev_tmp) {
1695 mlx5_ldev_for_each(i, 0, ldev) {
1696 pf = mlx5_lag_pf(ldev, i);
1697 if (pf->netdev == ndev_tmp) {
1698 idx++;
1699 break;
1700 }
1701 }
1702 if (i < MLX5_MAX_PORTS) {
1703 slave = bond_slave_get_rcu(ndev_tmp);
1704 if (slave)
1705 has_inactive |= bond_is_slave_inactive(slave);
1706 bond_status |= (1 << idx);
1707 }
1708
1709 num_slaves++;
1710 }
1711 rcu_read_unlock();
1712
1713 /* None of this lagdev's netdevs are slaves of this master. */
1714 if (!(bond_status & GENMASK(ldev->ports - 1, 0)))
1715 return 0;
1716
1717 if (lag_upper_info) {
1718 tracker->tx_type = lag_upper_info->tx_type;
1719 tracker->hash_type = lag_upper_info->hash_type;
1720 }
1721
1722 tracker->has_inactive = has_inactive;
1723 /* Determine bonding status:
1724 * A device is considered bonded if both its physical ports are slaves
1725 * of the same lag master, and only them.
1726 */
1727 is_in_lag = num_slaves == ldev->ports &&
1728 bond_status == GENMASK(ldev->ports - 1, 0);
1729
1730 /* Lag mode must be activebackup or hash. */
1731 mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP ||
1732 tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH;
1733
1734 is_bonded = is_in_lag && mode_supported;
1735 if (tracker->is_bonded != is_bonded) {
1736 tracker->is_bonded = is_bonded;
1737 changed = 1;
1738 }
1739
1740 if (!is_in_lag)
1741 return changed;
1742
1743 if (!mlx5_lag_is_ready(ldev))
1744 NL_SET_ERR_MSG_MOD(info->info.extack,
1745 "Can't activate LAG offload, PF is configured with more than 64 VFs");
1746 else if (!mode_supported)
1747 NL_SET_ERR_MSG_MOD(info->info.extack,
1748 "Can't activate LAG offload, TX type isn't supported");
1749
1750 return changed;
1751 }
1752
mlx5_handle_changelowerstate_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct net_device * ndev,struct netdev_notifier_changelowerstate_info * info)1753 static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev,
1754 struct lag_tracker *tracker,
1755 struct net_device *ndev,
1756 struct netdev_notifier_changelowerstate_info *info)
1757 {
1758 struct netdev_lag_lower_state_info *lag_lower_info;
1759 int idx;
1760
1761 if (!netif_is_lag_port(ndev))
1762 return 0;
1763
1764 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev);
1765 if (idx < 0)
1766 return 0;
1767
1768 /* This information is used to determine virtual to physical
1769 * port mapping.
1770 */
1771 lag_lower_info = info->lower_state_info;
1772 if (!lag_lower_info)
1773 return 0;
1774
1775 tracker->netdev_state[idx] = *lag_lower_info;
1776
1777 return 1;
1778 }
1779
mlx5_handle_changeinfodata_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct net_device * ndev)1780 static int mlx5_handle_changeinfodata_event(struct mlx5_lag *ldev,
1781 struct lag_tracker *tracker,
1782 struct net_device *ndev)
1783 {
1784 struct net_device *ndev_tmp;
1785 struct slave *slave;
1786 bool has_inactive = 0;
1787 int idx;
1788
1789 if (!netif_is_lag_master(ndev))
1790 return 0;
1791
1792 rcu_read_lock();
1793 for_each_netdev_in_bond_rcu(ndev, ndev_tmp) {
1794 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
1795 if (idx < 0)
1796 continue;
1797
1798 slave = bond_slave_get_rcu(ndev_tmp);
1799 if (slave)
1800 has_inactive |= bond_is_slave_inactive(slave);
1801 }
1802 rcu_read_unlock();
1803
1804 if (tracker->has_inactive == has_inactive)
1805 return 0;
1806
1807 tracker->has_inactive = has_inactive;
1808
1809 return 1;
1810 }
1811
mlx5_lag_update_tracker_speed(struct lag_tracker * tracker,struct net_device * ndev)1812 static void mlx5_lag_update_tracker_speed(struct lag_tracker *tracker,
1813 struct net_device *ndev)
1814 {
1815 struct ethtool_link_ksettings lksettings;
1816 struct net_device *bond_dev;
1817 int err;
1818
1819 if (netif_is_lag_master(ndev))
1820 bond_dev = ndev;
1821 else
1822 bond_dev = netdev_master_upper_dev_get(ndev);
1823
1824 if (!bond_dev) {
1825 tracker->bond_speed_mbps = SPEED_UNKNOWN;
1826 return;
1827 }
1828
1829 err = __ethtool_get_link_ksettings(bond_dev, &lksettings);
1830 if (err) {
1831 netdev_dbg(bond_dev,
1832 "Failed to get speed for bond dev %s, err=%d\n",
1833 bond_dev->name, err);
1834 tracker->bond_speed_mbps = SPEED_UNKNOWN;
1835 return;
1836 }
1837
1838 if (lksettings.base.speed == SPEED_UNKNOWN)
1839 tracker->bond_speed_mbps = 0;
1840 else
1841 tracker->bond_speed_mbps = lksettings.base.speed;
1842 }
1843
1844 /* Returns speed in Mbps. */
mlx5_lag_query_bond_speed(struct mlx5_core_dev * mdev,u32 * speed)1845 int mlx5_lag_query_bond_speed(struct mlx5_core_dev *mdev, u32 *speed)
1846 {
1847 struct mlx5_lag *ldev;
1848 unsigned long flags;
1849 int ret = 0;
1850
1851 spin_lock_irqsave(&lag_lock, flags);
1852 ldev = mlx5_lag_dev(mdev);
1853 if (!ldev) {
1854 ret = -ENODEV;
1855 goto unlock;
1856 }
1857
1858 *speed = ldev->tracker.bond_speed_mbps;
1859
1860 if (*speed == SPEED_UNKNOWN) {
1861 mlx5_core_dbg(mdev, "Bond speed is unknown\n");
1862 ret = -EINVAL;
1863 }
1864
1865 unlock:
1866 spin_unlock_irqrestore(&lag_lock, flags);
1867 return ret;
1868 }
1869 EXPORT_SYMBOL_GPL(mlx5_lag_query_bond_speed);
1870
1871 /* this handler is always registered to netdev events */
mlx5_lag_netdev_event(struct notifier_block * this,unsigned long event,void * ptr)1872 static int mlx5_lag_netdev_event(struct notifier_block *this,
1873 unsigned long event, void *ptr)
1874 {
1875 struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
1876 struct lag_tracker tracker;
1877 struct mlx5_lag *ldev;
1878 int changed = 0;
1879
1880 if (event != NETDEV_CHANGEUPPER &&
1881 event != NETDEV_CHANGELOWERSTATE &&
1882 event != NETDEV_CHANGEINFODATA)
1883 return NOTIFY_DONE;
1884
1885 ldev = container_of(this, struct mlx5_lag, nb);
1886
1887 tracker = ldev->tracker;
1888
1889 switch (event) {
1890 case NETDEV_CHANGEUPPER:
1891 changed = mlx5_handle_changeupper_event(ldev, &tracker, ptr);
1892 break;
1893 case NETDEV_CHANGELOWERSTATE:
1894 changed = mlx5_handle_changelowerstate_event(ldev, &tracker,
1895 ndev, ptr);
1896 break;
1897 case NETDEV_CHANGEINFODATA:
1898 changed = mlx5_handle_changeinfodata_event(ldev, &tracker, ndev);
1899 break;
1900 }
1901
1902 if (changed)
1903 mlx5_lag_update_tracker_speed(&tracker, ndev);
1904
1905 ldev->tracker = tracker;
1906
1907 if (changed)
1908 mlx5_queue_bond_work(ldev, 0);
1909
1910 return NOTIFY_DONE;
1911 }
1912
mlx5_ldev_add_netdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev,struct net_device * netdev)1913 static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev,
1914 struct mlx5_core_dev *dev,
1915 struct net_device *netdev)
1916 {
1917 struct lag_func *pf;
1918 unsigned long flags;
1919 int i;
1920
1921 spin_lock_irqsave(&lag_lock, flags);
1922 /* Find pf entry by matching dev pointer */
1923 mlx5_ldev_for_each(i, 0, ldev) {
1924 pf = mlx5_lag_pf(ldev, i);
1925 if (pf->dev == dev) {
1926 pf->netdev = netdev;
1927 ldev->tracker.netdev_state[i].link_up = 0;
1928 ldev->tracker.netdev_state[i].tx_enabled = 0;
1929 break;
1930 }
1931 }
1932 spin_unlock_irqrestore(&lag_lock, flags);
1933 }
1934
mlx5_ldev_remove_netdev(struct mlx5_lag * ldev,struct net_device * netdev)1935 static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev,
1936 struct net_device *netdev)
1937 {
1938 struct lag_func *pf;
1939 unsigned long flags;
1940 int i;
1941
1942 spin_lock_irqsave(&lag_lock, flags);
1943 mlx5_ldev_for_each(i, 0, ldev) {
1944 pf = mlx5_lag_pf(ldev, i);
1945 if (pf->netdev == netdev) {
1946 pf->netdev = NULL;
1947 break;
1948 }
1949 }
1950 spin_unlock_irqrestore(&lag_lock, flags);
1951 }
1952
mlx5_ldev_add_mdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev)1953 static int mlx5_ldev_add_mdev(struct mlx5_lag *ldev,
1954 struct mlx5_core_dev *dev)
1955 {
1956 struct lag_func *pf;
1957 u32 idx;
1958 int err;
1959
1960 pf = kzalloc_obj(*pf);
1961 if (!pf)
1962 return -ENOMEM;
1963
1964 err = xa_alloc(&ldev->pfs, &idx, pf, XA_LIMIT(0, MLX5_MAX_PORTS - 1),
1965 GFP_KERNEL);
1966 if (err) {
1967 kfree(pf);
1968 return err;
1969 }
1970
1971 pf->idx = idx;
1972 pf->dev = dev;
1973 dev->priv.lag = ldev;
1974
1975 MLX5_NB_INIT(&pf->port_change_nb,
1976 mlx5_lag_mpesw_port_change_event, PORT_CHANGE);
1977 mlx5_eq_notifier_register(dev, &pf->port_change_nb);
1978
1979 return 0;
1980 }
1981
mlx5_ldev_remove_mdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev)1982 static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
1983 struct mlx5_core_dev *dev)
1984 {
1985 struct lag_func *pf;
1986 int i;
1987
1988 mlx5_ldev_for_each(i, 0, ldev) {
1989 pf = mlx5_lag_pf(ldev, i);
1990 if (pf->dev == dev)
1991 break;
1992 }
1993 if (i >= MLX5_MAX_PORTS)
1994 return;
1995
1996 if (pf->port_change_nb.nb.notifier_call)
1997 mlx5_eq_notifier_unregister(dev, &pf->port_change_nb);
1998
1999 pf->dev = NULL;
2000 dev->priv.lag = NULL;
2001 xa_erase(&ldev->pfs, pf->idx);
2002 kfree(pf);
2003 }
2004
2005 /* Must be called with HCA devcom component lock held */
__mlx5_lag_dev_add_mdev(struct mlx5_core_dev * dev)2006 static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
2007 {
2008 struct mlx5_devcom_comp_dev *pos = NULL;
2009 struct mlx5_lag *ldev = NULL;
2010 struct mlx5_core_dev *tmp_dev;
2011 int err;
2012
2013 tmp_dev = mlx5_devcom_get_next_peer_data(dev->priv.hca_devcom_comp, &pos);
2014 if (tmp_dev)
2015 ldev = mlx5_lag_dev(tmp_dev);
2016
2017 if (!ldev) {
2018 ldev = mlx5_lag_dev_alloc(dev);
2019 if (!ldev) {
2020 mlx5_core_err(dev, "Failed to alloc lag dev\n");
2021 return 0;
2022 }
2023 err = mlx5_ldev_add_mdev(ldev, dev);
2024 if (err) {
2025 mlx5_core_err(dev, "Failed to add mdev to lag dev\n");
2026 mlx5_ldev_put(ldev);
2027 return 0;
2028 }
2029 return 0;
2030 }
2031
2032 mutex_lock(&ldev->lock);
2033 if (ldev->mode_changes_in_progress) {
2034 mutex_unlock(&ldev->lock);
2035 return -EAGAIN;
2036 }
2037 mlx5_ldev_get(ldev);
2038 err = mlx5_ldev_add_mdev(ldev, dev);
2039 if (err) {
2040 mlx5_ldev_put(ldev);
2041 mutex_unlock(&ldev->lock);
2042 return err;
2043 }
2044 mutex_unlock(&ldev->lock);
2045
2046 return 0;
2047 }
2048
mlx5_lag_unregister_hca_devcom_comp(struct mlx5_core_dev * dev)2049 static void mlx5_lag_unregister_hca_devcom_comp(struct mlx5_core_dev *dev)
2050 {
2051 mlx5_devcom_unregister_component(dev->priv.hca_devcom_comp);
2052 dev->priv.hca_devcom_comp = NULL;
2053 }
2054
mlx5_lag_register_hca_devcom_comp(struct mlx5_core_dev * dev)2055 static int mlx5_lag_register_hca_devcom_comp(struct mlx5_core_dev *dev)
2056 {
2057 struct mlx5_devcom_match_attr attr = {
2058 .flags = MLX5_DEVCOM_MATCH_FLAGS_NS,
2059 .net = mlx5_core_net(dev),
2060 };
2061 u8 len __always_unused;
2062
2063 mlx5_query_nic_sw_system_image_guid(dev, attr.key.buf, &len);
2064
2065 /* This component is use to sync adding core_dev to lag_dev and to sync
2066 * changes of mlx5_adev_devices between LAG layer and other layers.
2067 */
2068 dev->priv.hca_devcom_comp =
2069 mlx5_devcom_register_component(dev->priv.devc,
2070 MLX5_DEVCOM_HCA_PORTS,
2071 &attr, mlx5_lag_devcom_event,
2072 dev);
2073 if (!dev->priv.hca_devcom_comp) {
2074 mlx5_core_err(dev,
2075 "Failed to register devcom HCA component.");
2076 return -EINVAL;
2077 }
2078
2079 return 0;
2080 }
2081
mlx5_lag_remove_mdev(struct mlx5_core_dev * dev)2082 void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
2083 {
2084 struct mlx5_lag *ldev;
2085
2086 ldev = mlx5_lag_dev(dev);
2087 if (!ldev)
2088 return;
2089
2090 /* mdev is being removed, might as well remove debugfs
2091 * as early as possible.
2092 */
2093 mlx5_ldev_remove_debugfs(dev->priv.dbg.lag_debugfs);
2094 recheck:
2095 mutex_lock(&ldev->lock);
2096 if (ldev->mode_changes_in_progress) {
2097 mutex_unlock(&ldev->lock);
2098 msleep(100);
2099 goto recheck;
2100 }
2101 mlx5_ldev_remove_mdev(ldev, dev);
2102 mutex_unlock(&ldev->lock);
2103 /* Send devcom event to notify peers that a device is being removed */
2104 mlx5_devcom_send_event(dev->priv.hca_devcom_comp,
2105 LAG_DEVCOM_UNPAIR, LAG_DEVCOM_UNPAIR, dev);
2106 mlx5_lag_unregister_hca_devcom_comp(dev);
2107 mlx5_ldev_put(ldev);
2108 }
2109
mlx5_lag_add_mdev(struct mlx5_core_dev * dev)2110 void mlx5_lag_add_mdev(struct mlx5_core_dev *dev)
2111 {
2112 int err;
2113
2114 if (!mlx5_lag_is_supported(dev))
2115 return;
2116
2117 if (mlx5_lag_register_hca_devcom_comp(dev))
2118 return;
2119
2120 recheck:
2121 mlx5_devcom_comp_lock(dev->priv.hca_devcom_comp);
2122 err = __mlx5_lag_dev_add_mdev(dev);
2123 mlx5_devcom_comp_unlock(dev->priv.hca_devcom_comp);
2124
2125 if (err) {
2126 msleep(100);
2127 goto recheck;
2128 }
2129 /* Send devcom event to notify peers that a device was added */
2130 mlx5_devcom_send_event(dev->priv.hca_devcom_comp,
2131 LAG_DEVCOM_PAIR, LAG_DEVCOM_UNPAIR, dev);
2132 mlx5_ldev_add_debugfs(dev);
2133 }
2134
mlx5_lag_remove_netdev(struct mlx5_core_dev * dev,struct net_device * netdev)2135 void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev,
2136 struct net_device *netdev)
2137 {
2138 struct mlx5_lag *ldev;
2139 bool lag_is_active;
2140
2141 ldev = mlx5_lag_dev(dev);
2142 if (!ldev)
2143 return;
2144
2145 mutex_lock(&ldev->lock);
2146 mlx5_ldev_remove_netdev(ldev, netdev);
2147 clear_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
2148
2149 lag_is_active = __mlx5_lag_is_active(ldev);
2150 mutex_unlock(&ldev->lock);
2151
2152 if (lag_is_active)
2153 mlx5_queue_bond_work(ldev, 0);
2154 }
2155
mlx5_lag_add_netdev(struct mlx5_core_dev * dev,struct net_device * netdev)2156 void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
2157 struct net_device *netdev)
2158 {
2159 struct mlx5_lag *ldev;
2160 int num = 0;
2161
2162 ldev = mlx5_lag_dev(dev);
2163 if (!ldev)
2164 return;
2165
2166 mutex_lock(&ldev->lock);
2167 mlx5_ldev_add_netdev(ldev, dev, netdev);
2168 num = mlx5_lag_num_netdevs(ldev);
2169 if (num >= ldev->ports)
2170 set_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
2171 mutex_unlock(&ldev->lock);
2172 mlx5_queue_bond_work(ldev, 0);
2173 }
2174
mlx5_get_pre_ldev_func(struct mlx5_lag * ldev,int start_idx,int end_idx)2175 int mlx5_get_pre_ldev_func(struct mlx5_lag *ldev, int start_idx, int end_idx)
2176 {
2177 struct lag_func *pf;
2178 int i;
2179
2180 for (i = start_idx; i >= end_idx; i--) {
2181 pf = xa_load(&ldev->pfs, i);
2182 if (pf && pf->dev)
2183 return i;
2184 }
2185 return -1;
2186 }
2187
mlx5_get_next_ldev_func(struct mlx5_lag * ldev,int start_idx)2188 int mlx5_get_next_ldev_func(struct mlx5_lag *ldev, int start_idx)
2189 {
2190 struct lag_func *pf;
2191 unsigned long idx;
2192
2193 xa_for_each_start(&ldev->pfs, idx, pf, start_idx)
2194 if (pf->dev)
2195 return idx;
2196 return MLX5_MAX_PORTS;
2197 }
2198
mlx5_lag_is_roce(struct mlx5_core_dev * dev)2199 bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
2200 {
2201 struct mlx5_lag *ldev;
2202 unsigned long flags;
2203 bool res;
2204
2205 spin_lock_irqsave(&lag_lock, flags);
2206 ldev = mlx5_lag_dev(dev);
2207 res = ldev && __mlx5_lag_is_roce(ldev);
2208 spin_unlock_irqrestore(&lag_lock, flags);
2209
2210 return res;
2211 }
2212 EXPORT_SYMBOL(mlx5_lag_is_roce);
2213
mlx5_lag_is_active(struct mlx5_core_dev * dev)2214 bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
2215 {
2216 struct mlx5_lag *ldev;
2217 unsigned long flags;
2218 bool res;
2219
2220 spin_lock_irqsave(&lag_lock, flags);
2221 ldev = mlx5_lag_dev(dev);
2222 res = ldev && __mlx5_lag_is_active(ldev);
2223 spin_unlock_irqrestore(&lag_lock, flags);
2224
2225 return res;
2226 }
2227 EXPORT_SYMBOL(mlx5_lag_is_active);
2228
mlx5_lag_mode_is_hash(struct mlx5_core_dev * dev)2229 bool mlx5_lag_mode_is_hash(struct mlx5_core_dev *dev)
2230 {
2231 struct mlx5_lag *ldev;
2232 unsigned long flags;
2233 bool res = 0;
2234
2235 spin_lock_irqsave(&lag_lock, flags);
2236 ldev = mlx5_lag_dev(dev);
2237 if (ldev)
2238 res = test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags);
2239 spin_unlock_irqrestore(&lag_lock, flags);
2240
2241 return res;
2242 }
2243 EXPORT_SYMBOL(mlx5_lag_mode_is_hash);
2244
mlx5_lag_is_master(struct mlx5_core_dev * dev)2245 bool mlx5_lag_is_master(struct mlx5_core_dev *dev)
2246 {
2247 struct mlx5_lag *ldev;
2248 unsigned long flags;
2249 struct lag_func *pf;
2250 bool res = false;
2251 int idx;
2252
2253 spin_lock_irqsave(&lag_lock, flags);
2254 ldev = mlx5_lag_dev(dev);
2255 idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
2256 if (ldev && __mlx5_lag_is_active(ldev) && idx >= 0) {
2257 pf = mlx5_lag_pf(ldev, idx);
2258 res = pf && dev == pf->dev;
2259 }
2260 spin_unlock_irqrestore(&lag_lock, flags);
2261
2262 return res;
2263 }
2264 EXPORT_SYMBOL(mlx5_lag_is_master);
2265
mlx5_lag_is_sriov(struct mlx5_core_dev * dev)2266 bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
2267 {
2268 struct mlx5_lag *ldev;
2269 unsigned long flags;
2270 bool res;
2271
2272 spin_lock_irqsave(&lag_lock, flags);
2273 ldev = mlx5_lag_dev(dev);
2274 res = ldev && __mlx5_lag_is_sriov(ldev);
2275 spin_unlock_irqrestore(&lag_lock, flags);
2276
2277 return res;
2278 }
2279 EXPORT_SYMBOL(mlx5_lag_is_sriov);
2280
mlx5_lag_is_shared_fdb(struct mlx5_core_dev * dev)2281 bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev)
2282 {
2283 struct mlx5_lag *ldev;
2284 unsigned long flags;
2285 bool res;
2286
2287 spin_lock_irqsave(&lag_lock, flags);
2288 ldev = mlx5_lag_dev(dev);
2289 res = ldev && test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
2290 spin_unlock_irqrestore(&lag_lock, flags);
2291
2292 return res;
2293 }
2294 EXPORT_SYMBOL(mlx5_lag_is_shared_fdb);
2295
mlx5_lag_disable_change(struct mlx5_core_dev * dev)2296 void mlx5_lag_disable_change(struct mlx5_core_dev *dev)
2297 {
2298 struct mlx5_lag *ldev;
2299
2300 ldev = mlx5_lag_dev(dev);
2301 if (!ldev)
2302 return;
2303
2304 mlx5_devcom_comp_lock(dev->priv.hca_devcom_comp);
2305 mutex_lock(&ldev->lock);
2306
2307 ldev->mode_changes_in_progress++;
2308 if (__mlx5_lag_is_active(ldev)) {
2309 if (ldev->mode == MLX5_LAG_MODE_MPESW)
2310 mlx5_lag_disable_mpesw(ldev);
2311 else
2312 mlx5_disable_lag(ldev);
2313 }
2314
2315 mutex_unlock(&ldev->lock);
2316 mlx5_devcom_comp_unlock(dev->priv.hca_devcom_comp);
2317 }
2318
mlx5_lag_enable_change(struct mlx5_core_dev * dev)2319 void mlx5_lag_enable_change(struct mlx5_core_dev *dev)
2320 {
2321 struct mlx5_lag *ldev;
2322
2323 ldev = mlx5_lag_dev(dev);
2324 if (!ldev)
2325 return;
2326
2327 mutex_lock(&ldev->lock);
2328 ldev->mode_changes_in_progress--;
2329 mutex_unlock(&ldev->lock);
2330 mlx5_queue_bond_work(ldev, 0);
2331 }
2332
mlx5_lag_get_slave_port(struct mlx5_core_dev * dev,struct net_device * slave)2333 u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
2334 struct net_device *slave)
2335 {
2336 struct mlx5_lag *ldev;
2337 unsigned long flags;
2338 struct lag_func *pf;
2339 u8 port = 0;
2340 int i;
2341
2342 spin_lock_irqsave(&lag_lock, flags);
2343 ldev = mlx5_lag_dev(dev);
2344 if (!(ldev && __mlx5_lag_is_roce(ldev)))
2345 goto unlock;
2346
2347 mlx5_ldev_for_each(i, 0, ldev) {
2348 pf = mlx5_lag_pf(ldev, i);
2349 if (pf->netdev == slave) {
2350 port = i;
2351 break;
2352 }
2353 }
2354
2355 port = ldev->v2p_map[port * ldev->buckets];
2356
2357 unlock:
2358 spin_unlock_irqrestore(&lag_lock, flags);
2359 return port;
2360 }
2361 EXPORT_SYMBOL(mlx5_lag_get_slave_port);
2362
mlx5_lag_get_num_ports(struct mlx5_core_dev * dev)2363 u8 mlx5_lag_get_num_ports(struct mlx5_core_dev *dev)
2364 {
2365 struct mlx5_lag *ldev;
2366
2367 ldev = mlx5_lag_dev(dev);
2368 if (!ldev)
2369 return 0;
2370
2371 return ldev->ports;
2372 }
2373 EXPORT_SYMBOL(mlx5_lag_get_num_ports);
2374
mlx5_lag_get_next_peer_mdev(struct mlx5_core_dev * dev,int * i)2375 struct mlx5_core_dev *mlx5_lag_get_next_peer_mdev(struct mlx5_core_dev *dev, int *i)
2376 {
2377 struct mlx5_core_dev *peer_dev = NULL;
2378 struct mlx5_lag *ldev;
2379 unsigned long flags;
2380 struct lag_func *pf;
2381 int idx;
2382
2383 spin_lock_irqsave(&lag_lock, flags);
2384 ldev = mlx5_lag_dev(dev);
2385 if (!ldev)
2386 goto unlock;
2387
2388 if (*i == MLX5_MAX_PORTS)
2389 goto unlock;
2390 mlx5_ldev_for_each(idx, *i, ldev) {
2391 pf = mlx5_lag_pf(ldev, idx);
2392 if (pf->dev != dev)
2393 break;
2394 }
2395
2396 if (idx == MLX5_MAX_PORTS) {
2397 *i = idx;
2398 goto unlock;
2399 }
2400 *i = idx + 1;
2401
2402 pf = mlx5_lag_pf(ldev, idx);
2403 peer_dev = pf->dev;
2404
2405 unlock:
2406 spin_unlock_irqrestore(&lag_lock, flags);
2407 return peer_dev;
2408 }
2409 EXPORT_SYMBOL(mlx5_lag_get_next_peer_mdev);
2410
mlx5_lag_query_cong_counters(struct mlx5_core_dev * dev,u64 * values,int num_counters,size_t * offsets)2411 int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
2412 u64 *values,
2413 int num_counters,
2414 size_t *offsets)
2415 {
2416 int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
2417 struct mlx5_core_dev **mdev;
2418 int ret = 0, i, j, idx = 0;
2419 struct mlx5_lag *ldev;
2420 unsigned long flags;
2421 struct lag_func *pf;
2422 int num_ports;
2423 void *out;
2424
2425 out = kvzalloc(outlen, GFP_KERNEL);
2426 if (!out)
2427 return -ENOMEM;
2428
2429 mdev = kvzalloc(sizeof(mdev[0]) * MLX5_MAX_PORTS, GFP_KERNEL);
2430 if (!mdev) {
2431 ret = -ENOMEM;
2432 goto free_out;
2433 }
2434
2435 memset(values, 0, sizeof(*values) * num_counters);
2436
2437 spin_lock_irqsave(&lag_lock, flags);
2438 ldev = mlx5_lag_dev(dev);
2439 if (ldev && __mlx5_lag_is_active(ldev)) {
2440 num_ports = ldev->ports;
2441 mlx5_ldev_for_each(i, 0, ldev) {
2442 pf = mlx5_lag_pf(ldev, i);
2443 mdev[idx++] = pf->dev;
2444 }
2445 } else {
2446 num_ports = 1;
2447 mdev[MLX5_LAG_P1] = dev;
2448 }
2449 spin_unlock_irqrestore(&lag_lock, flags);
2450
2451 for (i = 0; i < num_ports; ++i) {
2452 u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {};
2453
2454 MLX5_SET(query_cong_statistics_in, in, opcode,
2455 MLX5_CMD_OP_QUERY_CONG_STATISTICS);
2456 ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in,
2457 out);
2458 if (ret)
2459 goto free_mdev;
2460
2461 for (j = 0; j < num_counters; ++j)
2462 values[j] += be64_to_cpup((__be64 *)(out + offsets[j]));
2463 }
2464
2465 free_mdev:
2466 kvfree(mdev);
2467 free_out:
2468 kvfree(out);
2469 return ret;
2470 }
2471 EXPORT_SYMBOL(mlx5_lag_query_cong_counters);
2472