1 /*
2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33 #include <linux/netdevice.h>
34 #include <net/bonding.h>
35 #include <linux/mlx5/driver.h>
36 #include <linux/mlx5/eswitch.h>
37 #include <linux/mlx5/vport.h>
38 #include "lib/devcom.h"
39 #include "mlx5_core.h"
40 #include "eswitch.h"
41 #include "esw/acl/ofld.h"
42 #include "lag.h"
43 #include "mp.h"
44 #include "mpesw.h"
45
46
47 /* General purpose, use for short periods of time.
48 * Beware of lock dependencies (preferably, no locks should be acquired
49 * under it).
50 */
51 static DEFINE_SPINLOCK(lag_lock);
52
get_port_sel_mode(enum mlx5_lag_mode mode,unsigned long flags)53 static int get_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags)
54 {
55 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
56 return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT;
57
58 if (mode == MLX5_LAG_MODE_MPESW)
59 return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW;
60
61 return MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY;
62 }
63
lag_active_port_bits(struct mlx5_lag * ldev)64 static u8 lag_active_port_bits(struct mlx5_lag *ldev)
65 {
66 u8 enabled_ports[MLX5_MAX_PORTS] = {};
67 u8 active_port = 0;
68 int num_enabled;
69 int idx;
70
71 mlx5_infer_tx_enabled(&ldev->tracker, ldev, enabled_ports,
72 &num_enabled);
73 for (idx = 0; idx < num_enabled; idx++)
74 active_port |= BIT_MASK(enabled_ports[idx]);
75
76 return active_port;
77 }
78
mlx5_cmd_create_lag(struct mlx5_core_dev * dev,struct mlx5_lag * ldev,int mode,unsigned long flags)79 static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, struct mlx5_lag *ldev,
80 int mode, unsigned long flags)
81 {
82 bool fdb_sel_mode = test_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE,
83 &flags);
84 int port_sel_mode = get_port_sel_mode(mode, flags);
85 u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {};
86 u8 *ports = ldev->v2p_map;
87 int idx0, idx1;
88 void *lag_ctx;
89
90 lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
91 MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG);
92 MLX5_SET(lagc, lag_ctx, fdb_selection_mode, fdb_sel_mode);
93 idx0 = mlx5_lag_get_dev_index_by_seq(ldev, 0);
94 idx1 = mlx5_lag_get_dev_index_by_seq(ldev, 1);
95
96 if (idx0 < 0 || idx1 < 0)
97 return -EINVAL;
98
99 switch (port_sel_mode) {
100 case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY:
101 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[idx0]);
102 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[idx1]);
103 break;
104 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT:
105 if (!MLX5_CAP_PORT_SELECTION(dev, port_select_flow_table_bypass))
106 break;
107
108 MLX5_SET(lagc, lag_ctx, active_port,
109 lag_active_port_bits(mlx5_lag_dev(dev)));
110 break;
111 default:
112 break;
113 }
114 MLX5_SET(lagc, lag_ctx, port_select_mode, port_sel_mode);
115
116 return mlx5_cmd_exec_in(dev, create_lag, in);
117 }
118
mlx5_cmd_modify_lag(struct mlx5_core_dev * dev,struct mlx5_lag * ldev,u8 * ports)119 static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, struct mlx5_lag *ldev,
120 u8 *ports)
121 {
122 u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
123 void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
124 int idx0, idx1;
125
126 idx0 = mlx5_lag_get_dev_index_by_seq(ldev, 0);
127 idx1 = mlx5_lag_get_dev_index_by_seq(ldev, 1);
128 if (idx0 < 0 || idx1 < 0)
129 return -EINVAL;
130
131 MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
132 MLX5_SET(modify_lag_in, in, field_select, 0x1);
133
134 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[idx0]);
135 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[idx1]);
136
137 return mlx5_cmd_exec_in(dev, modify_lag, in);
138 }
139
mlx5_cmd_create_vport_lag(struct mlx5_core_dev * dev)140 int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev)
141 {
142 u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {};
143
144 MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG);
145
146 return mlx5_cmd_exec_in(dev, create_vport_lag, in);
147 }
148 EXPORT_SYMBOL(mlx5_cmd_create_vport_lag);
149
mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev * dev)150 int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
151 {
152 u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {};
153
154 MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG);
155
156 return mlx5_cmd_exec_in(dev, destroy_vport_lag, in);
157 }
158 EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
159
mlx5_infer_tx_disabled(struct lag_tracker * tracker,struct mlx5_lag * ldev,u8 * ports,int * num_disabled)160 static void mlx5_infer_tx_disabled(struct lag_tracker *tracker, struct mlx5_lag *ldev,
161 u8 *ports, int *num_disabled)
162 {
163 int i;
164
165 *num_disabled = 0;
166 mlx5_ldev_for_each(i, 0, ldev)
167 if (!tracker->netdev_state[i].tx_enabled ||
168 !tracker->netdev_state[i].link_up)
169 ports[(*num_disabled)++] = i;
170 }
171
mlx5_infer_tx_enabled(struct lag_tracker * tracker,struct mlx5_lag * ldev,u8 * ports,int * num_enabled)172 void mlx5_infer_tx_enabled(struct lag_tracker *tracker, struct mlx5_lag *ldev,
173 u8 *ports, int *num_enabled)
174 {
175 int i;
176
177 *num_enabled = 0;
178 mlx5_ldev_for_each(i, 0, ldev)
179 if (tracker->netdev_state[i].tx_enabled &&
180 tracker->netdev_state[i].link_up)
181 ports[(*num_enabled)++] = i;
182
183 if (*num_enabled == 0)
184 mlx5_infer_tx_disabled(tracker, ldev, ports, num_enabled);
185 }
186
mlx5_lag_print_mapping(struct mlx5_core_dev * dev,struct mlx5_lag * ldev,struct lag_tracker * tracker,unsigned long flags)187 static void mlx5_lag_print_mapping(struct mlx5_core_dev *dev,
188 struct mlx5_lag *ldev,
189 struct lag_tracker *tracker,
190 unsigned long flags)
191 {
192 char buf[MLX5_MAX_PORTS * 10 + 1] = {};
193 u8 enabled_ports[MLX5_MAX_PORTS] = {};
194 int written = 0;
195 int num_enabled;
196 int idx;
197 int err;
198 int i;
199 int j;
200
201 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
202 mlx5_infer_tx_enabled(tracker, ldev, enabled_ports,
203 &num_enabled);
204 for (i = 0; i < num_enabled; i++) {
205 err = scnprintf(buf + written, 4, "%d, ", enabled_ports[i] + 1);
206 if (err != 3)
207 return;
208 written += err;
209 }
210 buf[written - 2] = 0;
211 mlx5_core_info(dev, "lag map active ports: %s\n", buf);
212 } else {
213 mlx5_ldev_for_each(i, 0, ldev) {
214 for (j = 0; j < ldev->buckets; j++) {
215 idx = i * ldev->buckets + j;
216 err = scnprintf(buf + written, 10,
217 " port %d:%d", i + 1, ldev->v2p_map[idx]);
218 if (err != 9)
219 return;
220 written += err;
221 }
222 }
223 mlx5_core_info(dev, "lag map:%s\n", buf);
224 }
225 }
226
227 static int mlx5_lag_netdev_event(struct notifier_block *this,
228 unsigned long event, void *ptr);
229 static void mlx5_do_bond_work(struct work_struct *work);
230
mlx5_ldev_free(struct kref * ref)231 static void mlx5_ldev_free(struct kref *ref)
232 {
233 struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref);
234
235 if (ldev->nb.notifier_call)
236 unregister_netdevice_notifier_net(&init_net, &ldev->nb);
237 mlx5_lag_mp_cleanup(ldev);
238 cancel_delayed_work_sync(&ldev->bond_work);
239 destroy_workqueue(ldev->wq);
240 mutex_destroy(&ldev->lock);
241 kfree(ldev);
242 }
243
mlx5_ldev_put(struct mlx5_lag * ldev)244 static void mlx5_ldev_put(struct mlx5_lag *ldev)
245 {
246 kref_put(&ldev->ref, mlx5_ldev_free);
247 }
248
mlx5_ldev_get(struct mlx5_lag * ldev)249 static void mlx5_ldev_get(struct mlx5_lag *ldev)
250 {
251 kref_get(&ldev->ref);
252 }
253
mlx5_lag_dev_alloc(struct mlx5_core_dev * dev)254 static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev)
255 {
256 struct mlx5_lag *ldev;
257 int err;
258
259 ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
260 if (!ldev)
261 return NULL;
262
263 ldev->wq = create_singlethread_workqueue("mlx5_lag");
264 if (!ldev->wq) {
265 kfree(ldev);
266 return NULL;
267 }
268
269 kref_init(&ldev->ref);
270 mutex_init(&ldev->lock);
271 INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
272
273 ldev->nb.notifier_call = mlx5_lag_netdev_event;
274 if (register_netdevice_notifier_net(&init_net, &ldev->nb)) {
275 ldev->nb.notifier_call = NULL;
276 mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
277 }
278 ldev->mode = MLX5_LAG_MODE_NONE;
279
280 err = mlx5_lag_mp_init(ldev);
281 if (err)
282 mlx5_core_err(dev, "Failed to init multipath lag err=%d\n",
283 err);
284
285 ldev->ports = MLX5_CAP_GEN(dev, num_lag_ports);
286 ldev->buckets = 1;
287
288 return ldev;
289 }
290
mlx5_lag_dev_get_netdev_idx(struct mlx5_lag * ldev,struct net_device * ndev)291 int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
292 struct net_device *ndev)
293 {
294 int i;
295
296 mlx5_ldev_for_each(i, 0, ldev)
297 if (ldev->pf[i].netdev == ndev)
298 return i;
299
300 return -ENOENT;
301 }
302
mlx5_lag_get_dev_index_by_seq(struct mlx5_lag * ldev,int seq)303 int mlx5_lag_get_dev_index_by_seq(struct mlx5_lag *ldev, int seq)
304 {
305 int i, num = 0;
306
307 if (!ldev)
308 return -ENOENT;
309
310 mlx5_ldev_for_each(i, 0, ldev) {
311 if (num == seq)
312 return i;
313 num++;
314 }
315 return -ENOENT;
316 }
317
mlx5_lag_num_devs(struct mlx5_lag * ldev)318 int mlx5_lag_num_devs(struct mlx5_lag *ldev)
319 {
320 int i, num = 0;
321
322 if (!ldev)
323 return 0;
324
325 mlx5_ldev_for_each(i, 0, ldev) {
326 (void)i;
327 num++;
328 }
329 return num;
330 }
331
mlx5_lag_num_netdevs(struct mlx5_lag * ldev)332 int mlx5_lag_num_netdevs(struct mlx5_lag *ldev)
333 {
334 int i, num = 0;
335
336 if (!ldev)
337 return 0;
338
339 mlx5_ldev_for_each(i, 0, ldev)
340 if (ldev->pf[i].netdev)
341 num++;
342 return num;
343 }
344
__mlx5_lag_is_roce(struct mlx5_lag * ldev)345 static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev)
346 {
347 return ldev->mode == MLX5_LAG_MODE_ROCE;
348 }
349
__mlx5_lag_is_sriov(struct mlx5_lag * ldev)350 static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev)
351 {
352 return ldev->mode == MLX5_LAG_MODE_SRIOV;
353 }
354
355 /* Create a mapping between steering slots and active ports.
356 * As we have ldev->buckets slots per port first assume the native
357 * mapping should be used.
358 * If there are ports that are disabled fill the relevant slots
359 * with mapping that points to active ports.
360 */
mlx5_infer_tx_affinity_mapping(struct lag_tracker * tracker,struct mlx5_lag * ldev,u8 buckets,u8 * ports)361 static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
362 struct mlx5_lag *ldev,
363 u8 buckets,
364 u8 *ports)
365 {
366 int disabled[MLX5_MAX_PORTS] = {};
367 int enabled[MLX5_MAX_PORTS] = {};
368 int disabled_ports_num = 0;
369 int enabled_ports_num = 0;
370 int idx;
371 u32 rand;
372 int i;
373 int j;
374
375 mlx5_ldev_for_each(i, 0, ldev) {
376 if (tracker->netdev_state[i].tx_enabled &&
377 tracker->netdev_state[i].link_up)
378 enabled[enabled_ports_num++] = i;
379 else
380 disabled[disabled_ports_num++] = i;
381 }
382
383 /* Use native mapping by default where each port's buckets
384 * point the native port: 1 1 1 .. 1 2 2 2 ... 2 3 3 3 ... 3 etc
385 */
386 mlx5_ldev_for_each(i, 0, ldev) {
387 for (j = 0; j < buckets; j++) {
388 idx = i * buckets + j;
389 ports[idx] = i + 1;
390 }
391 }
392
393 /* If all ports are disabled/enabled keep native mapping */
394 if (enabled_ports_num == ldev->ports ||
395 disabled_ports_num == ldev->ports)
396 return;
397
398 /* Go over the disabled ports and for each assign a random active port */
399 for (i = 0; i < disabled_ports_num; i++) {
400 for (j = 0; j < buckets; j++) {
401 get_random_bytes(&rand, 4);
402 ports[disabled[i] * buckets + j] = enabled[rand % enabled_ports_num] + 1;
403 }
404 }
405 }
406
mlx5_lag_has_drop_rule(struct mlx5_lag * ldev)407 static bool mlx5_lag_has_drop_rule(struct mlx5_lag *ldev)
408 {
409 int i;
410
411 mlx5_ldev_for_each(i, 0, ldev)
412 if (ldev->pf[i].has_drop)
413 return true;
414 return false;
415 }
416
mlx5_lag_drop_rule_cleanup(struct mlx5_lag * ldev)417 static void mlx5_lag_drop_rule_cleanup(struct mlx5_lag *ldev)
418 {
419 int i;
420
421 mlx5_ldev_for_each(i, 0, ldev) {
422 if (!ldev->pf[i].has_drop)
423 continue;
424
425 mlx5_esw_acl_ingress_vport_drop_rule_destroy(ldev->pf[i].dev->priv.eswitch,
426 MLX5_VPORT_UPLINK);
427 ldev->pf[i].has_drop = false;
428 }
429 }
430
mlx5_lag_drop_rule_setup(struct mlx5_lag * ldev,struct lag_tracker * tracker)431 static void mlx5_lag_drop_rule_setup(struct mlx5_lag *ldev,
432 struct lag_tracker *tracker)
433 {
434 u8 disabled_ports[MLX5_MAX_PORTS] = {};
435 struct mlx5_core_dev *dev;
436 int disabled_index;
437 int num_disabled;
438 int err;
439 int i;
440
441 /* First delete the current drop rule so there won't be any dropped
442 * packets
443 */
444 mlx5_lag_drop_rule_cleanup(ldev);
445
446 if (!ldev->tracker.has_inactive)
447 return;
448
449 mlx5_infer_tx_disabled(tracker, ldev, disabled_ports, &num_disabled);
450
451 for (i = 0; i < num_disabled; i++) {
452 disabled_index = disabled_ports[i];
453 dev = ldev->pf[disabled_index].dev;
454 err = mlx5_esw_acl_ingress_vport_drop_rule_create(dev->priv.eswitch,
455 MLX5_VPORT_UPLINK);
456 if (!err)
457 ldev->pf[disabled_index].has_drop = true;
458 else
459 mlx5_core_err(dev,
460 "Failed to create lag drop rule, error: %d", err);
461 }
462 }
463
mlx5_cmd_modify_active_port(struct mlx5_core_dev * dev,u8 ports)464 static int mlx5_cmd_modify_active_port(struct mlx5_core_dev *dev, u8 ports)
465 {
466 u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
467 void *lag_ctx;
468
469 lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
470
471 MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
472 MLX5_SET(modify_lag_in, in, field_select, 0x2);
473
474 MLX5_SET(lagc, lag_ctx, active_port, ports);
475
476 return mlx5_cmd_exec_in(dev, modify_lag, in);
477 }
478
_mlx5_modify_lag(struct mlx5_lag * ldev,u8 * ports)479 static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports)
480 {
481 int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
482 struct mlx5_core_dev *dev0;
483 u8 active_ports;
484 int ret;
485
486 if (idx < 0)
487 return -EINVAL;
488
489 dev0 = ldev->pf[idx].dev;
490 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags)) {
491 ret = mlx5_lag_port_sel_modify(ldev, ports);
492 if (ret ||
493 !MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table_bypass))
494 return ret;
495
496 active_ports = lag_active_port_bits(ldev);
497
498 return mlx5_cmd_modify_active_port(dev0, active_ports);
499 }
500 return mlx5_cmd_modify_lag(dev0, ldev, ports);
501 }
502
mlx5_lag_active_backup_get_netdev(struct mlx5_core_dev * dev)503 static struct net_device *mlx5_lag_active_backup_get_netdev(struct mlx5_core_dev *dev)
504 {
505 struct net_device *ndev = NULL;
506 struct mlx5_lag *ldev;
507 unsigned long flags;
508 int i, last_idx;
509
510 spin_lock_irqsave(&lag_lock, flags);
511 ldev = mlx5_lag_dev(dev);
512
513 if (!ldev)
514 goto unlock;
515
516 mlx5_ldev_for_each(i, 0, ldev)
517 if (ldev->tracker.netdev_state[i].tx_enabled)
518 ndev = ldev->pf[i].netdev;
519 if (!ndev) {
520 last_idx = mlx5_lag_get_dev_index_by_seq(ldev, ldev->ports - 1);
521 if (last_idx < 0)
522 goto unlock;
523 ndev = ldev->pf[last_idx].netdev;
524 }
525
526 if (ndev)
527 dev_hold(ndev);
528
529 unlock:
530 spin_unlock_irqrestore(&lag_lock, flags);
531
532 return ndev;
533 }
534
mlx5_modify_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker)535 void mlx5_modify_lag(struct mlx5_lag *ldev,
536 struct lag_tracker *tracker)
537 {
538 int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
539 u8 ports[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS] = {};
540 struct mlx5_core_dev *dev0;
541 int idx;
542 int err;
543 int i;
544 int j;
545
546 if (first_idx < 0)
547 return;
548
549 dev0 = ldev->pf[first_idx].dev;
550 mlx5_infer_tx_affinity_mapping(tracker, ldev, ldev->buckets, ports);
551
552 mlx5_ldev_for_each(i, 0, ldev) {
553 for (j = 0; j < ldev->buckets; j++) {
554 idx = i * ldev->buckets + j;
555 if (ports[idx] == ldev->v2p_map[idx])
556 continue;
557 err = _mlx5_modify_lag(ldev, ports);
558 if (err) {
559 mlx5_core_err(dev0,
560 "Failed to modify LAG (%d)\n",
561 err);
562 return;
563 }
564 memcpy(ldev->v2p_map, ports, sizeof(ports));
565
566 mlx5_lag_print_mapping(dev0, ldev, tracker,
567 ldev->mode_flags);
568 break;
569 }
570 }
571
572 if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
573 struct net_device *ndev = mlx5_lag_active_backup_get_netdev(dev0);
574
575 if(!(ldev->mode == MLX5_LAG_MODE_ROCE))
576 mlx5_lag_drop_rule_setup(ldev, tracker);
577 /** Only sriov and roce lag should have tracker->tx_type set so
578 * no need to check the mode
579 */
580 blocking_notifier_call_chain(&dev0->priv.lag_nh,
581 MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE,
582 ndev);
583 dev_put(ndev);
584 }
585 }
586
mlx5_lag_set_port_sel_mode_roce(struct mlx5_lag * ldev,unsigned long * flags)587 static int mlx5_lag_set_port_sel_mode_roce(struct mlx5_lag *ldev,
588 unsigned long *flags)
589 {
590 int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
591 struct mlx5_core_dev *dev0;
592
593 if (first_idx < 0)
594 return -EINVAL;
595
596 dev0 = ldev->pf[first_idx].dev;
597 if (!MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table)) {
598 if (ldev->ports > 2)
599 return -EINVAL;
600 return 0;
601 }
602
603 if (ldev->ports > 2)
604 ldev->buckets = MLX5_LAG_MAX_HASH_BUCKETS;
605
606 set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags);
607
608 return 0;
609 }
610
mlx5_lag_set_port_sel_mode_offloads(struct mlx5_lag * ldev,struct lag_tracker * tracker,enum mlx5_lag_mode mode,unsigned long * flags)611 static void mlx5_lag_set_port_sel_mode_offloads(struct mlx5_lag *ldev,
612 struct lag_tracker *tracker,
613 enum mlx5_lag_mode mode,
614 unsigned long *flags)
615 {
616 int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
617 struct lag_func *dev0;
618
619 if (first_idx < 0 || mode == MLX5_LAG_MODE_MPESW)
620 return;
621
622 dev0 = &ldev->pf[first_idx];
623 if (MLX5_CAP_PORT_SELECTION(dev0->dev, port_select_flow_table) &&
624 tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH) {
625 if (ldev->ports > 2)
626 ldev->buckets = MLX5_LAG_MAX_HASH_BUCKETS;
627 set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags);
628 }
629 }
630
mlx5_lag_set_flags(struct mlx5_lag * ldev,enum mlx5_lag_mode mode,struct lag_tracker * tracker,bool shared_fdb,unsigned long * flags)631 static int mlx5_lag_set_flags(struct mlx5_lag *ldev, enum mlx5_lag_mode mode,
632 struct lag_tracker *tracker, bool shared_fdb,
633 unsigned long *flags)
634 {
635 bool roce_lag = mode == MLX5_LAG_MODE_ROCE;
636
637 *flags = 0;
638 if (shared_fdb) {
639 set_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, flags);
640 set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags);
641 }
642
643 if (mode == MLX5_LAG_MODE_MPESW)
644 set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags);
645
646 if (roce_lag)
647 return mlx5_lag_set_port_sel_mode_roce(ldev, flags);
648
649 mlx5_lag_set_port_sel_mode_offloads(ldev, tracker, mode, flags);
650 return 0;
651 }
652
mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode,unsigned long flags)653 char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags)
654 {
655 int port_sel_mode = get_port_sel_mode(mode, flags);
656
657 switch (port_sel_mode) {
658 case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY: return "queue_affinity";
659 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT: return "hash";
660 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW: return "mpesw";
661 default: return "invalid";
662 }
663 }
664
mlx5_lag_create_single_fdb(struct mlx5_lag * ldev)665 static int mlx5_lag_create_single_fdb(struct mlx5_lag *ldev)
666 {
667 int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
668 struct mlx5_eswitch *master_esw;
669 struct mlx5_core_dev *dev0;
670 int i, j;
671 int err;
672
673 if (first_idx < 0)
674 return -EINVAL;
675
676 dev0 = ldev->pf[first_idx].dev;
677 master_esw = dev0->priv.eswitch;
678 mlx5_ldev_for_each(i, first_idx + 1, ldev) {
679 struct mlx5_eswitch *slave_esw = ldev->pf[i].dev->priv.eswitch;
680
681 err = mlx5_eswitch_offloads_single_fdb_add_one(master_esw,
682 slave_esw, ldev->ports);
683 if (err)
684 goto err;
685 }
686 return 0;
687 err:
688 mlx5_ldev_for_each_reverse(j, i, first_idx + 1, ldev)
689 mlx5_eswitch_offloads_single_fdb_del_one(master_esw,
690 ldev->pf[j].dev->priv.eswitch);
691 return err;
692 }
693
mlx5_create_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker,enum mlx5_lag_mode mode,unsigned long flags)694 static int mlx5_create_lag(struct mlx5_lag *ldev,
695 struct lag_tracker *tracker,
696 enum mlx5_lag_mode mode,
697 unsigned long flags)
698 {
699 bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
700 int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
701 u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
702 struct mlx5_core_dev *dev0;
703 int err;
704
705 if (first_idx < 0)
706 return -EINVAL;
707
708 dev0 = ldev->pf[first_idx].dev;
709 if (tracker)
710 mlx5_lag_print_mapping(dev0, ldev, tracker, flags);
711 mlx5_core_info(dev0, "shared_fdb:%d mode:%s\n",
712 shared_fdb, mlx5_get_str_port_sel_mode(mode, flags));
713
714 err = mlx5_cmd_create_lag(dev0, ldev, mode, flags);
715 if (err) {
716 mlx5_core_err(dev0,
717 "Failed to create LAG (%d)\n",
718 err);
719 return err;
720 }
721
722 if (shared_fdb) {
723 err = mlx5_lag_create_single_fdb(ldev);
724 if (err)
725 mlx5_core_err(dev0, "Can't enable single FDB mode\n");
726 else
727 mlx5_core_info(dev0, "Operation mode is single FDB\n");
728 }
729
730 if (err) {
731 MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
732 if (mlx5_cmd_exec_in(dev0, destroy_lag, in))
733 mlx5_core_err(dev0,
734 "Failed to deactivate RoCE LAG; driver restart required\n");
735 }
736 BLOCKING_INIT_NOTIFIER_HEAD(&dev0->priv.lag_nh);
737
738 return err;
739 }
740
mlx5_activate_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker,enum mlx5_lag_mode mode,bool shared_fdb)741 int mlx5_activate_lag(struct mlx5_lag *ldev,
742 struct lag_tracker *tracker,
743 enum mlx5_lag_mode mode,
744 bool shared_fdb)
745 {
746 int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
747 bool roce_lag = mode == MLX5_LAG_MODE_ROCE;
748 struct mlx5_core_dev *dev0;
749 unsigned long flags = 0;
750 int err;
751
752 if (first_idx < 0)
753 return -EINVAL;
754
755 dev0 = ldev->pf[first_idx].dev;
756 err = mlx5_lag_set_flags(ldev, mode, tracker, shared_fdb, &flags);
757 if (err)
758 return err;
759
760 if (mode != MLX5_LAG_MODE_MPESW) {
761 mlx5_infer_tx_affinity_mapping(tracker, ldev, ldev->buckets, ldev->v2p_map);
762 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
763 err = mlx5_lag_port_sel_create(ldev, tracker->hash_type,
764 ldev->v2p_map);
765 if (err) {
766 mlx5_core_err(dev0,
767 "Failed to create LAG port selection(%d)\n",
768 err);
769 return err;
770 }
771 }
772 }
773
774 err = mlx5_create_lag(ldev, tracker, mode, flags);
775 if (err) {
776 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
777 mlx5_lag_port_sel_destroy(ldev);
778 if (roce_lag)
779 mlx5_core_err(dev0,
780 "Failed to activate RoCE LAG\n");
781 else
782 mlx5_core_err(dev0,
783 "Failed to activate VF LAG\n"
784 "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
785 return err;
786 }
787
788 if (tracker && tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP &&
789 !roce_lag)
790 mlx5_lag_drop_rule_setup(ldev, tracker);
791
792 ldev->mode = mode;
793 ldev->mode_flags = flags;
794 return 0;
795 }
796
mlx5_deactivate_lag(struct mlx5_lag * ldev)797 int mlx5_deactivate_lag(struct mlx5_lag *ldev)
798 {
799 int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
800 u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
801 bool roce_lag = __mlx5_lag_is_roce(ldev);
802 unsigned long flags = ldev->mode_flags;
803 struct mlx5_eswitch *master_esw;
804 struct mlx5_core_dev *dev0;
805 int err;
806 int i;
807
808 if (first_idx < 0)
809 return -EINVAL;
810
811 dev0 = ldev->pf[first_idx].dev;
812 master_esw = dev0->priv.eswitch;
813 ldev->mode = MLX5_LAG_MODE_NONE;
814 ldev->mode_flags = 0;
815 mlx5_lag_mp_reset(ldev);
816
817 if (test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags)) {
818 mlx5_ldev_for_each(i, first_idx + 1, ldev)
819 mlx5_eswitch_offloads_single_fdb_del_one(master_esw,
820 ldev->pf[i].dev->priv.eswitch);
821 clear_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
822 }
823
824 MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
825 err = mlx5_cmd_exec_in(dev0, destroy_lag, in);
826 if (err) {
827 if (roce_lag) {
828 mlx5_core_err(dev0,
829 "Failed to deactivate RoCE LAG; driver restart required\n");
830 } else {
831 mlx5_core_err(dev0,
832 "Failed to deactivate VF LAG; driver restart required\n"
833 "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
834 }
835 return err;
836 }
837
838 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
839 mlx5_lag_port_sel_destroy(ldev);
840 ldev->buckets = 1;
841 }
842 if (mlx5_lag_has_drop_rule(ldev))
843 mlx5_lag_drop_rule_cleanup(ldev);
844
845 return 0;
846 }
847
mlx5_lag_check_prereq(struct mlx5_lag * ldev)848 bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
849 {
850 int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
851 #ifdef CONFIG_MLX5_ESWITCH
852 struct mlx5_core_dev *dev;
853 u8 mode;
854 #endif
855 bool roce_support;
856 int i;
857
858 if (first_idx < 0 || mlx5_lag_num_devs(ldev) != ldev->ports)
859 return false;
860
861 #ifdef CONFIG_MLX5_ESWITCH
862 mlx5_ldev_for_each(i, 0, ldev) {
863 dev = ldev->pf[i].dev;
864 if (mlx5_eswitch_num_vfs(dev->priv.eswitch) && !is_mdev_switchdev_mode(dev))
865 return false;
866 }
867
868 dev = ldev->pf[first_idx].dev;
869 mode = mlx5_eswitch_mode(dev);
870 mlx5_ldev_for_each(i, 0, ldev)
871 if (mlx5_eswitch_mode(ldev->pf[i].dev) != mode)
872 return false;
873
874 #else
875 mlx5_ldev_for_each(i, 0, ldev)
876 if (mlx5_sriov_is_enabled(ldev->pf[i].dev))
877 return false;
878 #endif
879 roce_support = mlx5_get_roce_state(ldev->pf[first_idx].dev);
880 mlx5_ldev_for_each(i, first_idx + 1, ldev)
881 if (mlx5_get_roce_state(ldev->pf[i].dev) != roce_support)
882 return false;
883
884 return true;
885 }
886
mlx5_lag_add_devices(struct mlx5_lag * ldev)887 void mlx5_lag_add_devices(struct mlx5_lag *ldev)
888 {
889 int i;
890
891 mlx5_ldev_for_each(i, 0, ldev) {
892 if (ldev->pf[i].dev->priv.flags &
893 MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
894 continue;
895
896 ldev->pf[i].dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
897 mlx5_rescan_drivers_locked(ldev->pf[i].dev);
898 }
899 }
900
mlx5_lag_remove_devices(struct mlx5_lag * ldev)901 void mlx5_lag_remove_devices(struct mlx5_lag *ldev)
902 {
903 int i;
904
905 mlx5_ldev_for_each(i, 0, ldev) {
906 if (ldev->pf[i].dev->priv.flags &
907 MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
908 continue;
909
910 ldev->pf[i].dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
911 mlx5_rescan_drivers_locked(ldev->pf[i].dev);
912 }
913 }
914
mlx5_disable_lag(struct mlx5_lag * ldev)915 void mlx5_disable_lag(struct mlx5_lag *ldev)
916 {
917 bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
918 int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
919 struct mlx5_core_dev *dev0;
920 bool roce_lag;
921 int err;
922 int i;
923
924 if (idx < 0)
925 return;
926
927 dev0 = ldev->pf[idx].dev;
928 roce_lag = __mlx5_lag_is_roce(ldev);
929
930 if (shared_fdb) {
931 mlx5_lag_remove_devices(ldev);
932 } else if (roce_lag) {
933 if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) {
934 dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
935 mlx5_rescan_drivers_locked(dev0);
936 }
937 mlx5_ldev_for_each(i, idx + 1, ldev)
938 mlx5_nic_vport_disable_roce(ldev->pf[i].dev);
939 }
940
941 err = mlx5_deactivate_lag(ldev);
942 if (err)
943 return;
944
945 if (shared_fdb || roce_lag)
946 mlx5_lag_add_devices(ldev);
947
948 if (shared_fdb)
949 mlx5_ldev_for_each(i, 0, ldev)
950 if (!(ldev->pf[i].dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
951 mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch);
952 }
953
mlx5_lag_shared_fdb_supported(struct mlx5_lag * ldev)954 bool mlx5_lag_shared_fdb_supported(struct mlx5_lag *ldev)
955 {
956 int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
957 struct mlx5_core_dev *dev;
958 int i;
959
960 if (idx < 0)
961 return false;
962
963 mlx5_ldev_for_each(i, idx + 1, ldev) {
964 dev = ldev->pf[i].dev;
965 if (is_mdev_switchdev_mode(dev) &&
966 mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) &&
967 MLX5_CAP_GEN(dev, lag_native_fdb_selection) &&
968 MLX5_CAP_ESW(dev, root_ft_on_other_esw) &&
969 mlx5_eswitch_get_npeers(dev->priv.eswitch) ==
970 MLX5_CAP_GEN(dev, num_lag_ports) - 1)
971 continue;
972 return false;
973 }
974
975 dev = ldev->pf[idx].dev;
976 if (is_mdev_switchdev_mode(dev) &&
977 mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) &&
978 mlx5_esw_offloads_devcom_is_ready(dev->priv.eswitch) &&
979 MLX5_CAP_ESW(dev, esw_shared_ingress_acl) &&
980 mlx5_eswitch_get_npeers(dev->priv.eswitch) == MLX5_CAP_GEN(dev, num_lag_ports) - 1)
981 return true;
982
983 return false;
984 }
985
mlx5_lag_is_roce_lag(struct mlx5_lag * ldev)986 static bool mlx5_lag_is_roce_lag(struct mlx5_lag *ldev)
987 {
988 bool roce_lag = true;
989 int i;
990
991 mlx5_ldev_for_each(i, 0, ldev)
992 roce_lag = roce_lag && !mlx5_sriov_is_enabled(ldev->pf[i].dev);
993
994 #ifdef CONFIG_MLX5_ESWITCH
995 mlx5_ldev_for_each(i, 0, ldev)
996 roce_lag = roce_lag && is_mdev_legacy_mode(ldev->pf[i].dev);
997 #endif
998
999 return roce_lag;
1000 }
1001
mlx5_lag_should_modify_lag(struct mlx5_lag * ldev,bool do_bond)1002 static bool mlx5_lag_should_modify_lag(struct mlx5_lag *ldev, bool do_bond)
1003 {
1004 return do_bond && __mlx5_lag_is_active(ldev) &&
1005 ldev->mode != MLX5_LAG_MODE_MPESW;
1006 }
1007
mlx5_lag_should_disable_lag(struct mlx5_lag * ldev,bool do_bond)1008 static bool mlx5_lag_should_disable_lag(struct mlx5_lag *ldev, bool do_bond)
1009 {
1010 return !do_bond && __mlx5_lag_is_active(ldev) &&
1011 ldev->mode != MLX5_LAG_MODE_MPESW;
1012 }
1013
mlx5_do_bond(struct mlx5_lag * ldev)1014 static void mlx5_do_bond(struct mlx5_lag *ldev)
1015 {
1016 int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
1017 struct lag_tracker tracker = { };
1018 struct mlx5_core_dev *dev0;
1019 struct net_device *ndev;
1020 bool do_bond, roce_lag;
1021 int err;
1022 int i;
1023
1024 if (idx < 0)
1025 return;
1026
1027 dev0 = ldev->pf[idx].dev;
1028 if (!mlx5_lag_is_ready(ldev)) {
1029 do_bond = false;
1030 } else {
1031 /* VF LAG is in multipath mode, ignore bond change requests */
1032 if (mlx5_lag_is_multipath(dev0))
1033 return;
1034
1035 tracker = ldev->tracker;
1036
1037 do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
1038 }
1039
1040 if (do_bond && !__mlx5_lag_is_active(ldev)) {
1041 bool shared_fdb = mlx5_lag_shared_fdb_supported(ldev);
1042
1043 roce_lag = mlx5_lag_is_roce_lag(ldev);
1044
1045 if (shared_fdb || roce_lag)
1046 mlx5_lag_remove_devices(ldev);
1047
1048 err = mlx5_activate_lag(ldev, &tracker,
1049 roce_lag ? MLX5_LAG_MODE_ROCE :
1050 MLX5_LAG_MODE_SRIOV,
1051 shared_fdb);
1052 if (err) {
1053 if (shared_fdb || roce_lag)
1054 mlx5_lag_add_devices(ldev);
1055
1056 return;
1057 } else if (roce_lag) {
1058 dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
1059 mlx5_rescan_drivers_locked(dev0);
1060 mlx5_ldev_for_each(i, idx + 1, ldev) {
1061 if (mlx5_get_roce_state(ldev->pf[i].dev))
1062 mlx5_nic_vport_enable_roce(ldev->pf[i].dev);
1063 }
1064 } else if (shared_fdb) {
1065 int i;
1066
1067 dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
1068 mlx5_rescan_drivers_locked(dev0);
1069
1070 mlx5_ldev_for_each(i, 0, ldev) {
1071 err = mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch);
1072 if (err)
1073 break;
1074 }
1075
1076 if (err) {
1077 dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
1078 mlx5_rescan_drivers_locked(dev0);
1079 mlx5_deactivate_lag(ldev);
1080 mlx5_lag_add_devices(ldev);
1081 mlx5_ldev_for_each(i, 0, ldev)
1082 mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch);
1083 mlx5_core_err(dev0, "Failed to enable lag\n");
1084 return;
1085 }
1086 }
1087 if (tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
1088 ndev = mlx5_lag_active_backup_get_netdev(dev0);
1089 /** Only sriov and roce lag should have tracker->TX_type
1090 * set so no need to check the mode
1091 */
1092 blocking_notifier_call_chain(&dev0->priv.lag_nh,
1093 MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE,
1094 ndev);
1095 dev_put(ndev);
1096 }
1097 } else if (mlx5_lag_should_modify_lag(ldev, do_bond)) {
1098 mlx5_modify_lag(ldev, &tracker);
1099 } else if (mlx5_lag_should_disable_lag(ldev, do_bond)) {
1100 mlx5_disable_lag(ldev);
1101 }
1102 }
1103
1104 /* The last mdev to unregister will destroy the workqueue before removing the
1105 * devcom component, and as all the mdevs use the same devcom component we are
1106 * guaranteed that the devcom is valid while the calling work is running.
1107 */
mlx5_lag_get_devcom_comp(struct mlx5_lag * ldev)1108 struct mlx5_devcom_comp_dev *mlx5_lag_get_devcom_comp(struct mlx5_lag *ldev)
1109 {
1110 struct mlx5_devcom_comp_dev *devcom = NULL;
1111 int i;
1112
1113 mutex_lock(&ldev->lock);
1114 i = mlx5_get_next_ldev_func(ldev, 0);
1115 if (i < MLX5_MAX_PORTS)
1116 devcom = ldev->pf[i].dev->priv.hca_devcom_comp;
1117 mutex_unlock(&ldev->lock);
1118 return devcom;
1119 }
1120
mlx5_queue_bond_work(struct mlx5_lag * ldev,unsigned long delay)1121 static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
1122 {
1123 queue_delayed_work(ldev->wq, &ldev->bond_work, delay);
1124 }
1125
mlx5_do_bond_work(struct work_struct * work)1126 static void mlx5_do_bond_work(struct work_struct *work)
1127 {
1128 struct delayed_work *delayed_work = to_delayed_work(work);
1129 struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag,
1130 bond_work);
1131 struct mlx5_devcom_comp_dev *devcom;
1132 int status;
1133
1134 devcom = mlx5_lag_get_devcom_comp(ldev);
1135 if (!devcom)
1136 return;
1137
1138 status = mlx5_devcom_comp_trylock(devcom);
1139 if (!status) {
1140 mlx5_queue_bond_work(ldev, HZ);
1141 return;
1142 }
1143
1144 mutex_lock(&ldev->lock);
1145 if (ldev->mode_changes_in_progress) {
1146 mutex_unlock(&ldev->lock);
1147 mlx5_devcom_comp_unlock(devcom);
1148 mlx5_queue_bond_work(ldev, HZ);
1149 return;
1150 }
1151
1152 mlx5_do_bond(ldev);
1153 mutex_unlock(&ldev->lock);
1154 mlx5_devcom_comp_unlock(devcom);
1155 }
1156
mlx5_handle_changeupper_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct netdev_notifier_changeupper_info * info)1157 static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
1158 struct lag_tracker *tracker,
1159 struct netdev_notifier_changeupper_info *info)
1160 {
1161 struct net_device *upper = info->upper_dev, *ndev_tmp;
1162 struct netdev_lag_upper_info *lag_upper_info = NULL;
1163 bool is_bonded, is_in_lag, mode_supported;
1164 bool has_inactive = 0;
1165 struct slave *slave;
1166 u8 bond_status = 0;
1167 int num_slaves = 0;
1168 int changed = 0;
1169 int i, idx = -1;
1170
1171 if (!netif_is_lag_master(upper))
1172 return 0;
1173
1174 if (info->linking)
1175 lag_upper_info = info->upper_info;
1176
1177 /* The event may still be of interest if the slave does not belong to
1178 * us, but is enslaved to a master which has one or more of our netdevs
1179 * as slaves (e.g., if a new slave is added to a master that bonds two
1180 * of our netdevs, we should unbond).
1181 */
1182 rcu_read_lock();
1183 for_each_netdev_in_bond_rcu(upper, ndev_tmp) {
1184 mlx5_ldev_for_each(i, 0, ldev) {
1185 if (ldev->pf[i].netdev == ndev_tmp) {
1186 idx++;
1187 break;
1188 }
1189 }
1190 if (i < MLX5_MAX_PORTS) {
1191 slave = bond_slave_get_rcu(ndev_tmp);
1192 if (slave)
1193 has_inactive |= bond_is_slave_inactive(slave);
1194 bond_status |= (1 << idx);
1195 }
1196
1197 num_slaves++;
1198 }
1199 rcu_read_unlock();
1200
1201 /* None of this lagdev's netdevs are slaves of this master. */
1202 if (!(bond_status & GENMASK(ldev->ports - 1, 0)))
1203 return 0;
1204
1205 if (lag_upper_info) {
1206 tracker->tx_type = lag_upper_info->tx_type;
1207 tracker->hash_type = lag_upper_info->hash_type;
1208 }
1209
1210 tracker->has_inactive = has_inactive;
1211 /* Determine bonding status:
1212 * A device is considered bonded if both its physical ports are slaves
1213 * of the same lag master, and only them.
1214 */
1215 is_in_lag = num_slaves == ldev->ports &&
1216 bond_status == GENMASK(ldev->ports - 1, 0);
1217
1218 /* Lag mode must be activebackup or hash. */
1219 mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP ||
1220 tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH;
1221
1222 is_bonded = is_in_lag && mode_supported;
1223 if (tracker->is_bonded != is_bonded) {
1224 tracker->is_bonded = is_bonded;
1225 changed = 1;
1226 }
1227
1228 if (!is_in_lag)
1229 return changed;
1230
1231 if (!mlx5_lag_is_ready(ldev))
1232 NL_SET_ERR_MSG_MOD(info->info.extack,
1233 "Can't activate LAG offload, PF is configured with more than 64 VFs");
1234 else if (!mode_supported)
1235 NL_SET_ERR_MSG_MOD(info->info.extack,
1236 "Can't activate LAG offload, TX type isn't supported");
1237
1238 return changed;
1239 }
1240
mlx5_handle_changelowerstate_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct net_device * ndev,struct netdev_notifier_changelowerstate_info * info)1241 static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev,
1242 struct lag_tracker *tracker,
1243 struct net_device *ndev,
1244 struct netdev_notifier_changelowerstate_info *info)
1245 {
1246 struct netdev_lag_lower_state_info *lag_lower_info;
1247 int idx;
1248
1249 if (!netif_is_lag_port(ndev))
1250 return 0;
1251
1252 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev);
1253 if (idx < 0)
1254 return 0;
1255
1256 /* This information is used to determine virtual to physical
1257 * port mapping.
1258 */
1259 lag_lower_info = info->lower_state_info;
1260 if (!lag_lower_info)
1261 return 0;
1262
1263 tracker->netdev_state[idx] = *lag_lower_info;
1264
1265 return 1;
1266 }
1267
mlx5_handle_changeinfodata_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct net_device * ndev)1268 static int mlx5_handle_changeinfodata_event(struct mlx5_lag *ldev,
1269 struct lag_tracker *tracker,
1270 struct net_device *ndev)
1271 {
1272 struct net_device *ndev_tmp;
1273 struct slave *slave;
1274 bool has_inactive = 0;
1275 int idx;
1276
1277 if (!netif_is_lag_master(ndev))
1278 return 0;
1279
1280 rcu_read_lock();
1281 for_each_netdev_in_bond_rcu(ndev, ndev_tmp) {
1282 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
1283 if (idx < 0)
1284 continue;
1285
1286 slave = bond_slave_get_rcu(ndev_tmp);
1287 if (slave)
1288 has_inactive |= bond_is_slave_inactive(slave);
1289 }
1290 rcu_read_unlock();
1291
1292 if (tracker->has_inactive == has_inactive)
1293 return 0;
1294
1295 tracker->has_inactive = has_inactive;
1296
1297 return 1;
1298 }
1299
1300 /* this handler is always registered to netdev events */
mlx5_lag_netdev_event(struct notifier_block * this,unsigned long event,void * ptr)1301 static int mlx5_lag_netdev_event(struct notifier_block *this,
1302 unsigned long event, void *ptr)
1303 {
1304 struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
1305 struct lag_tracker tracker;
1306 struct mlx5_lag *ldev;
1307 int changed = 0;
1308
1309 if (event != NETDEV_CHANGEUPPER &&
1310 event != NETDEV_CHANGELOWERSTATE &&
1311 event != NETDEV_CHANGEINFODATA)
1312 return NOTIFY_DONE;
1313
1314 ldev = container_of(this, struct mlx5_lag, nb);
1315
1316 tracker = ldev->tracker;
1317
1318 switch (event) {
1319 case NETDEV_CHANGEUPPER:
1320 changed = mlx5_handle_changeupper_event(ldev, &tracker, ptr);
1321 break;
1322 case NETDEV_CHANGELOWERSTATE:
1323 changed = mlx5_handle_changelowerstate_event(ldev, &tracker,
1324 ndev, ptr);
1325 break;
1326 case NETDEV_CHANGEINFODATA:
1327 changed = mlx5_handle_changeinfodata_event(ldev, &tracker, ndev);
1328 break;
1329 }
1330
1331 ldev->tracker = tracker;
1332
1333 if (changed)
1334 mlx5_queue_bond_work(ldev, 0);
1335
1336 return NOTIFY_DONE;
1337 }
1338
mlx5_ldev_add_netdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev,struct net_device * netdev)1339 static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev,
1340 struct mlx5_core_dev *dev,
1341 struct net_device *netdev)
1342 {
1343 unsigned int fn = mlx5_get_dev_index(dev);
1344 unsigned long flags;
1345
1346 spin_lock_irqsave(&lag_lock, flags);
1347 ldev->pf[fn].netdev = netdev;
1348 ldev->tracker.netdev_state[fn].link_up = 0;
1349 ldev->tracker.netdev_state[fn].tx_enabled = 0;
1350 spin_unlock_irqrestore(&lag_lock, flags);
1351 }
1352
mlx5_ldev_remove_netdev(struct mlx5_lag * ldev,struct net_device * netdev)1353 static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev,
1354 struct net_device *netdev)
1355 {
1356 unsigned long flags;
1357 int i;
1358
1359 spin_lock_irqsave(&lag_lock, flags);
1360 mlx5_ldev_for_each(i, 0, ldev) {
1361 if (ldev->pf[i].netdev == netdev) {
1362 ldev->pf[i].netdev = NULL;
1363 break;
1364 }
1365 }
1366 spin_unlock_irqrestore(&lag_lock, flags);
1367 }
1368
mlx5_ldev_add_mdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev)1369 static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev,
1370 struct mlx5_core_dev *dev)
1371 {
1372 unsigned int fn = mlx5_get_dev_index(dev);
1373
1374 ldev->pf[fn].dev = dev;
1375 dev->priv.lag = ldev;
1376 }
1377
mlx5_ldev_remove_mdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev)1378 static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
1379 struct mlx5_core_dev *dev)
1380 {
1381 int fn;
1382
1383 fn = mlx5_get_dev_index(dev);
1384 if (ldev->pf[fn].dev != dev)
1385 return;
1386
1387 ldev->pf[fn].dev = NULL;
1388 dev->priv.lag = NULL;
1389 }
1390
1391 /* Must be called with HCA devcom component lock held */
__mlx5_lag_dev_add_mdev(struct mlx5_core_dev * dev)1392 static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
1393 {
1394 struct mlx5_devcom_comp_dev *pos = NULL;
1395 struct mlx5_lag *ldev = NULL;
1396 struct mlx5_core_dev *tmp_dev;
1397
1398 tmp_dev = mlx5_devcom_get_next_peer_data(dev->priv.hca_devcom_comp, &pos);
1399 if (tmp_dev)
1400 ldev = mlx5_lag_dev(tmp_dev);
1401
1402 if (!ldev) {
1403 ldev = mlx5_lag_dev_alloc(dev);
1404 if (!ldev) {
1405 mlx5_core_err(dev, "Failed to alloc lag dev\n");
1406 return 0;
1407 }
1408 mlx5_ldev_add_mdev(ldev, dev);
1409 return 0;
1410 }
1411
1412 mutex_lock(&ldev->lock);
1413 if (ldev->mode_changes_in_progress) {
1414 mutex_unlock(&ldev->lock);
1415 return -EAGAIN;
1416 }
1417 mlx5_ldev_get(ldev);
1418 mlx5_ldev_add_mdev(ldev, dev);
1419 mutex_unlock(&ldev->lock);
1420
1421 return 0;
1422 }
1423
mlx5_lag_remove_mdev(struct mlx5_core_dev * dev)1424 void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
1425 {
1426 struct mlx5_lag *ldev;
1427
1428 ldev = mlx5_lag_dev(dev);
1429 if (!ldev)
1430 return;
1431
1432 /* mdev is being removed, might as well remove debugfs
1433 * as early as possible.
1434 */
1435 mlx5_ldev_remove_debugfs(dev->priv.dbg.lag_debugfs);
1436 recheck:
1437 mutex_lock(&ldev->lock);
1438 if (ldev->mode_changes_in_progress) {
1439 mutex_unlock(&ldev->lock);
1440 msleep(100);
1441 goto recheck;
1442 }
1443 mlx5_ldev_remove_mdev(ldev, dev);
1444 mutex_unlock(&ldev->lock);
1445 mlx5_ldev_put(ldev);
1446 }
1447
mlx5_lag_add_mdev(struct mlx5_core_dev * dev)1448 void mlx5_lag_add_mdev(struct mlx5_core_dev *dev)
1449 {
1450 int err;
1451
1452 if (!mlx5_lag_is_supported(dev))
1453 return;
1454
1455 if (IS_ERR_OR_NULL(dev->priv.hca_devcom_comp))
1456 return;
1457
1458 recheck:
1459 mlx5_devcom_comp_lock(dev->priv.hca_devcom_comp);
1460 err = __mlx5_lag_dev_add_mdev(dev);
1461 mlx5_devcom_comp_unlock(dev->priv.hca_devcom_comp);
1462
1463 if (err) {
1464 msleep(100);
1465 goto recheck;
1466 }
1467 mlx5_ldev_add_debugfs(dev);
1468 }
1469
mlx5_lag_remove_netdev(struct mlx5_core_dev * dev,struct net_device * netdev)1470 void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev,
1471 struct net_device *netdev)
1472 {
1473 struct mlx5_lag *ldev;
1474 bool lag_is_active;
1475
1476 ldev = mlx5_lag_dev(dev);
1477 if (!ldev)
1478 return;
1479
1480 mutex_lock(&ldev->lock);
1481 mlx5_ldev_remove_netdev(ldev, netdev);
1482 clear_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
1483
1484 lag_is_active = __mlx5_lag_is_active(ldev);
1485 mutex_unlock(&ldev->lock);
1486
1487 if (lag_is_active)
1488 mlx5_queue_bond_work(ldev, 0);
1489 }
1490
mlx5_lag_add_netdev(struct mlx5_core_dev * dev,struct net_device * netdev)1491 void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
1492 struct net_device *netdev)
1493 {
1494 struct mlx5_lag *ldev;
1495 int num = 0;
1496
1497 ldev = mlx5_lag_dev(dev);
1498 if (!ldev)
1499 return;
1500
1501 mutex_lock(&ldev->lock);
1502 mlx5_ldev_add_netdev(ldev, dev, netdev);
1503 num = mlx5_lag_num_netdevs(ldev);
1504 if (num >= ldev->ports)
1505 set_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
1506 mutex_unlock(&ldev->lock);
1507 mlx5_queue_bond_work(ldev, 0);
1508 }
1509
mlx5_get_pre_ldev_func(struct mlx5_lag * ldev,int start_idx,int end_idx)1510 int mlx5_get_pre_ldev_func(struct mlx5_lag *ldev, int start_idx, int end_idx)
1511 {
1512 int i;
1513
1514 for (i = start_idx; i >= end_idx; i--)
1515 if (ldev->pf[i].dev)
1516 return i;
1517 return -1;
1518 }
1519
mlx5_get_next_ldev_func(struct mlx5_lag * ldev,int start_idx)1520 int mlx5_get_next_ldev_func(struct mlx5_lag *ldev, int start_idx)
1521 {
1522 int i;
1523
1524 for (i = start_idx; i < MLX5_MAX_PORTS; i++)
1525 if (ldev->pf[i].dev)
1526 return i;
1527 return MLX5_MAX_PORTS;
1528 }
1529
mlx5_lag_is_roce(struct mlx5_core_dev * dev)1530 bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
1531 {
1532 struct mlx5_lag *ldev;
1533 unsigned long flags;
1534 bool res;
1535
1536 spin_lock_irqsave(&lag_lock, flags);
1537 ldev = mlx5_lag_dev(dev);
1538 res = ldev && __mlx5_lag_is_roce(ldev);
1539 spin_unlock_irqrestore(&lag_lock, flags);
1540
1541 return res;
1542 }
1543 EXPORT_SYMBOL(mlx5_lag_is_roce);
1544
mlx5_lag_is_active(struct mlx5_core_dev * dev)1545 bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
1546 {
1547 struct mlx5_lag *ldev;
1548 unsigned long flags;
1549 bool res;
1550
1551 spin_lock_irqsave(&lag_lock, flags);
1552 ldev = mlx5_lag_dev(dev);
1553 res = ldev && __mlx5_lag_is_active(ldev);
1554 spin_unlock_irqrestore(&lag_lock, flags);
1555
1556 return res;
1557 }
1558 EXPORT_SYMBOL(mlx5_lag_is_active);
1559
mlx5_lag_mode_is_hash(struct mlx5_core_dev * dev)1560 bool mlx5_lag_mode_is_hash(struct mlx5_core_dev *dev)
1561 {
1562 struct mlx5_lag *ldev;
1563 unsigned long flags;
1564 bool res = 0;
1565
1566 spin_lock_irqsave(&lag_lock, flags);
1567 ldev = mlx5_lag_dev(dev);
1568 if (ldev)
1569 res = test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags);
1570 spin_unlock_irqrestore(&lag_lock, flags);
1571
1572 return res;
1573 }
1574 EXPORT_SYMBOL(mlx5_lag_mode_is_hash);
1575
mlx5_lag_is_master(struct mlx5_core_dev * dev)1576 bool mlx5_lag_is_master(struct mlx5_core_dev *dev)
1577 {
1578 struct mlx5_lag *ldev;
1579 unsigned long flags;
1580 bool res = false;
1581 int idx;
1582
1583 spin_lock_irqsave(&lag_lock, flags);
1584 ldev = mlx5_lag_dev(dev);
1585 idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
1586 res = ldev && __mlx5_lag_is_active(ldev) && idx >= 0 && dev == ldev->pf[idx].dev;
1587 spin_unlock_irqrestore(&lag_lock, flags);
1588
1589 return res;
1590 }
1591 EXPORT_SYMBOL(mlx5_lag_is_master);
1592
mlx5_lag_is_sriov(struct mlx5_core_dev * dev)1593 bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
1594 {
1595 struct mlx5_lag *ldev;
1596 unsigned long flags;
1597 bool res;
1598
1599 spin_lock_irqsave(&lag_lock, flags);
1600 ldev = mlx5_lag_dev(dev);
1601 res = ldev && __mlx5_lag_is_sriov(ldev);
1602 spin_unlock_irqrestore(&lag_lock, flags);
1603
1604 return res;
1605 }
1606 EXPORT_SYMBOL(mlx5_lag_is_sriov);
1607
mlx5_lag_is_shared_fdb(struct mlx5_core_dev * dev)1608 bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev)
1609 {
1610 struct mlx5_lag *ldev;
1611 unsigned long flags;
1612 bool res;
1613
1614 spin_lock_irqsave(&lag_lock, flags);
1615 ldev = mlx5_lag_dev(dev);
1616 res = ldev && test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
1617 spin_unlock_irqrestore(&lag_lock, flags);
1618
1619 return res;
1620 }
1621 EXPORT_SYMBOL(mlx5_lag_is_shared_fdb);
1622
mlx5_lag_disable_change(struct mlx5_core_dev * dev)1623 void mlx5_lag_disable_change(struct mlx5_core_dev *dev)
1624 {
1625 struct mlx5_lag *ldev;
1626
1627 ldev = mlx5_lag_dev(dev);
1628 if (!ldev)
1629 return;
1630
1631 mlx5_devcom_comp_lock(dev->priv.hca_devcom_comp);
1632 mutex_lock(&ldev->lock);
1633
1634 ldev->mode_changes_in_progress++;
1635 if (__mlx5_lag_is_active(ldev))
1636 mlx5_disable_lag(ldev);
1637
1638 mutex_unlock(&ldev->lock);
1639 mlx5_devcom_comp_unlock(dev->priv.hca_devcom_comp);
1640 }
1641
mlx5_lag_enable_change(struct mlx5_core_dev * dev)1642 void mlx5_lag_enable_change(struct mlx5_core_dev *dev)
1643 {
1644 struct mlx5_lag *ldev;
1645
1646 ldev = mlx5_lag_dev(dev);
1647 if (!ldev)
1648 return;
1649
1650 mutex_lock(&ldev->lock);
1651 ldev->mode_changes_in_progress--;
1652 mutex_unlock(&ldev->lock);
1653 mlx5_queue_bond_work(ldev, 0);
1654 }
1655
mlx5_lag_get_slave_port(struct mlx5_core_dev * dev,struct net_device * slave)1656 u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
1657 struct net_device *slave)
1658 {
1659 struct mlx5_lag *ldev;
1660 unsigned long flags;
1661 u8 port = 0;
1662 int i;
1663
1664 spin_lock_irqsave(&lag_lock, flags);
1665 ldev = mlx5_lag_dev(dev);
1666 if (!(ldev && __mlx5_lag_is_roce(ldev)))
1667 goto unlock;
1668
1669 mlx5_ldev_for_each(i, 0, ldev) {
1670 if (ldev->pf[i].netdev == slave) {
1671 port = i;
1672 break;
1673 }
1674 }
1675
1676 port = ldev->v2p_map[port * ldev->buckets];
1677
1678 unlock:
1679 spin_unlock_irqrestore(&lag_lock, flags);
1680 return port;
1681 }
1682 EXPORT_SYMBOL(mlx5_lag_get_slave_port);
1683
mlx5_lag_get_num_ports(struct mlx5_core_dev * dev)1684 u8 mlx5_lag_get_num_ports(struct mlx5_core_dev *dev)
1685 {
1686 struct mlx5_lag *ldev;
1687
1688 ldev = mlx5_lag_dev(dev);
1689 if (!ldev)
1690 return 0;
1691
1692 return ldev->ports;
1693 }
1694 EXPORT_SYMBOL(mlx5_lag_get_num_ports);
1695
mlx5_lag_get_next_peer_mdev(struct mlx5_core_dev * dev,int * i)1696 struct mlx5_core_dev *mlx5_lag_get_next_peer_mdev(struct mlx5_core_dev *dev, int *i)
1697 {
1698 struct mlx5_core_dev *peer_dev = NULL;
1699 struct mlx5_lag *ldev;
1700 unsigned long flags;
1701 int idx;
1702
1703 spin_lock_irqsave(&lag_lock, flags);
1704 ldev = mlx5_lag_dev(dev);
1705 if (!ldev)
1706 goto unlock;
1707
1708 if (*i == MLX5_MAX_PORTS)
1709 goto unlock;
1710 mlx5_ldev_for_each(idx, *i, ldev)
1711 if (ldev->pf[idx].dev != dev)
1712 break;
1713
1714 if (idx == MLX5_MAX_PORTS) {
1715 *i = idx;
1716 goto unlock;
1717 }
1718 *i = idx + 1;
1719
1720 peer_dev = ldev->pf[idx].dev;
1721
1722 unlock:
1723 spin_unlock_irqrestore(&lag_lock, flags);
1724 return peer_dev;
1725 }
1726 EXPORT_SYMBOL(mlx5_lag_get_next_peer_mdev);
1727
mlx5_lag_query_cong_counters(struct mlx5_core_dev * dev,u64 * values,int num_counters,size_t * offsets)1728 int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
1729 u64 *values,
1730 int num_counters,
1731 size_t *offsets)
1732 {
1733 int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
1734 struct mlx5_core_dev **mdev;
1735 int ret = 0, i, j, idx = 0;
1736 struct mlx5_lag *ldev;
1737 unsigned long flags;
1738 int num_ports;
1739 void *out;
1740
1741 out = kvzalloc(outlen, GFP_KERNEL);
1742 if (!out)
1743 return -ENOMEM;
1744
1745 mdev = kvzalloc(sizeof(mdev[0]) * MLX5_MAX_PORTS, GFP_KERNEL);
1746 if (!mdev) {
1747 ret = -ENOMEM;
1748 goto free_out;
1749 }
1750
1751 memset(values, 0, sizeof(*values) * num_counters);
1752
1753 spin_lock_irqsave(&lag_lock, flags);
1754 ldev = mlx5_lag_dev(dev);
1755 if (ldev && __mlx5_lag_is_active(ldev)) {
1756 num_ports = ldev->ports;
1757 mlx5_ldev_for_each(i, 0, ldev)
1758 mdev[idx++] = ldev->pf[i].dev;
1759 } else {
1760 num_ports = 1;
1761 mdev[MLX5_LAG_P1] = dev;
1762 }
1763 spin_unlock_irqrestore(&lag_lock, flags);
1764
1765 for (i = 0; i < num_ports; ++i) {
1766 u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {};
1767
1768 MLX5_SET(query_cong_statistics_in, in, opcode,
1769 MLX5_CMD_OP_QUERY_CONG_STATISTICS);
1770 ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in,
1771 out);
1772 if (ret)
1773 goto free_mdev;
1774
1775 for (j = 0; j < num_counters; ++j)
1776 values[j] += be64_to_cpup((__be64 *)(out + offsets[j]));
1777 }
1778
1779 free_mdev:
1780 kvfree(mdev);
1781 free_out:
1782 kvfree(out);
1783 return ret;
1784 }
1785 EXPORT_SYMBOL(mlx5_lag_query_cong_counters);
1786