1 /*
2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33 #include <linux/netdevice.h>
34 #include <net/bonding.h>
35 #include <linux/mlx5/driver.h>
36 #include <linux/mlx5/eswitch.h>
37 #include <linux/mlx5/vport.h>
38 #include "lib/mlx5.h"
39 #include "lib/devcom.h"
40 #include "mlx5_core.h"
41 #include "eswitch.h"
42 #include "esw/acl/ofld.h"
43 #include "lag.h"
44 #include "mp.h"
45 #include "mpesw.h"
46
47
48 /* General purpose, use for short periods of time.
49 * Beware of lock dependencies (preferably, no locks should be acquired
50 * under it).
51 */
52 static DEFINE_SPINLOCK(lag_lock);
53
get_port_sel_mode(enum mlx5_lag_mode mode,unsigned long flags)54 static int get_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags)
55 {
56 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
57 return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT;
58
59 if (mode == MLX5_LAG_MODE_MPESW)
60 return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW;
61
62 return MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY;
63 }
64
lag_active_port_bits(struct mlx5_lag * ldev)65 static u8 lag_active_port_bits(struct mlx5_lag *ldev)
66 {
67 u8 enabled_ports[MLX5_MAX_PORTS] = {};
68 u8 active_port = 0;
69 int num_enabled;
70 int idx;
71
72 mlx5_infer_tx_enabled(&ldev->tracker, ldev, enabled_ports,
73 &num_enabled);
74 for (idx = 0; idx < num_enabled; idx++)
75 active_port |= BIT_MASK(enabled_ports[idx]);
76
77 return active_port;
78 }
79
mlx5_cmd_create_lag(struct mlx5_core_dev * dev,struct mlx5_lag * ldev,int mode,unsigned long flags)80 static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, struct mlx5_lag *ldev,
81 int mode, unsigned long flags)
82 {
83 bool fdb_sel_mode = test_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE,
84 &flags);
85 int port_sel_mode = get_port_sel_mode(mode, flags);
86 u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {};
87 u8 *ports = ldev->v2p_map;
88 int idx0, idx1;
89 void *lag_ctx;
90
91 lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
92 MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG);
93 MLX5_SET(lagc, lag_ctx, fdb_selection_mode, fdb_sel_mode);
94 idx0 = mlx5_lag_get_dev_index_by_seq(ldev, 0);
95 idx1 = mlx5_lag_get_dev_index_by_seq(ldev, 1);
96
97 if (idx0 < 0 || idx1 < 0)
98 return -EINVAL;
99
100 switch (port_sel_mode) {
101 case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY:
102 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[idx0]);
103 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[idx1]);
104 break;
105 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT:
106 if (!MLX5_CAP_PORT_SELECTION(dev, port_select_flow_table_bypass))
107 break;
108
109 MLX5_SET(lagc, lag_ctx, active_port,
110 lag_active_port_bits(mlx5_lag_dev(dev)));
111 break;
112 default:
113 break;
114 }
115 MLX5_SET(lagc, lag_ctx, port_select_mode, port_sel_mode);
116
117 return mlx5_cmd_exec_in(dev, create_lag, in);
118 }
119
mlx5_cmd_modify_lag(struct mlx5_core_dev * dev,struct mlx5_lag * ldev,u8 * ports)120 static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, struct mlx5_lag *ldev,
121 u8 *ports)
122 {
123 u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
124 void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
125 int idx0, idx1;
126
127 idx0 = mlx5_lag_get_dev_index_by_seq(ldev, 0);
128 idx1 = mlx5_lag_get_dev_index_by_seq(ldev, 1);
129 if (idx0 < 0 || idx1 < 0)
130 return -EINVAL;
131
132 MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
133 MLX5_SET(modify_lag_in, in, field_select, 0x1);
134
135 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[idx0]);
136 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[idx1]);
137
138 return mlx5_cmd_exec_in(dev, modify_lag, in);
139 }
140
mlx5_cmd_create_vport_lag(struct mlx5_core_dev * dev)141 int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev)
142 {
143 u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {};
144
145 MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG);
146
147 return mlx5_cmd_exec_in(dev, create_vport_lag, in);
148 }
149 EXPORT_SYMBOL(mlx5_cmd_create_vport_lag);
150
mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev * dev)151 int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
152 {
153 u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {};
154
155 MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG);
156
157 return mlx5_cmd_exec_in(dev, destroy_vport_lag, in);
158 }
159 EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
160
mlx5_infer_tx_disabled(struct lag_tracker * tracker,struct mlx5_lag * ldev,u8 * ports,int * num_disabled)161 static void mlx5_infer_tx_disabled(struct lag_tracker *tracker, struct mlx5_lag *ldev,
162 u8 *ports, int *num_disabled)
163 {
164 int i;
165
166 *num_disabled = 0;
167 mlx5_ldev_for_each(i, 0, ldev)
168 if (!tracker->netdev_state[i].tx_enabled ||
169 !tracker->netdev_state[i].link_up)
170 ports[(*num_disabled)++] = i;
171 }
172
mlx5_infer_tx_enabled(struct lag_tracker * tracker,struct mlx5_lag * ldev,u8 * ports,int * num_enabled)173 void mlx5_infer_tx_enabled(struct lag_tracker *tracker, struct mlx5_lag *ldev,
174 u8 *ports, int *num_enabled)
175 {
176 int i;
177
178 *num_enabled = 0;
179 mlx5_ldev_for_each(i, 0, ldev)
180 if (tracker->netdev_state[i].tx_enabled &&
181 tracker->netdev_state[i].link_up)
182 ports[(*num_enabled)++] = i;
183
184 if (*num_enabled == 0)
185 mlx5_infer_tx_disabled(tracker, ldev, ports, num_enabled);
186 }
187
mlx5_lag_print_mapping(struct mlx5_core_dev * dev,struct mlx5_lag * ldev,struct lag_tracker * tracker,unsigned long flags)188 static void mlx5_lag_print_mapping(struct mlx5_core_dev *dev,
189 struct mlx5_lag *ldev,
190 struct lag_tracker *tracker,
191 unsigned long flags)
192 {
193 char buf[MLX5_MAX_PORTS * 10 + 1] = {};
194 u8 enabled_ports[MLX5_MAX_PORTS] = {};
195 int written = 0;
196 int num_enabled;
197 int idx;
198 int err;
199 int i;
200 int j;
201
202 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
203 mlx5_infer_tx_enabled(tracker, ldev, enabled_ports,
204 &num_enabled);
205 for (i = 0; i < num_enabled; i++) {
206 err = scnprintf(buf + written, 4, "%d, ", enabled_ports[i] + 1);
207 if (err != 3)
208 return;
209 written += err;
210 }
211 buf[written - 2] = 0;
212 mlx5_core_info(dev, "lag map active ports: %s\n", buf);
213 } else {
214 mlx5_ldev_for_each(i, 0, ldev) {
215 for (j = 0; j < ldev->buckets; j++) {
216 idx = i * ldev->buckets + j;
217 err = scnprintf(buf + written, 10,
218 " port %d:%d", i + 1, ldev->v2p_map[idx]);
219 if (err != 9)
220 return;
221 written += err;
222 }
223 }
224 mlx5_core_info(dev, "lag map:%s\n", buf);
225 }
226 }
227
228 static int mlx5_lag_netdev_event(struct notifier_block *this,
229 unsigned long event, void *ptr);
230 static void mlx5_do_bond_work(struct work_struct *work);
231
mlx5_ldev_free(struct kref * ref)232 static void mlx5_ldev_free(struct kref *ref)
233 {
234 struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref);
235 struct net *net;
236
237 if (ldev->nb.notifier_call) {
238 net = read_pnet(&ldev->net);
239 unregister_netdevice_notifier_net(net, &ldev->nb);
240 }
241
242 mlx5_lag_mp_cleanup(ldev);
243 cancel_delayed_work_sync(&ldev->bond_work);
244 destroy_workqueue(ldev->wq);
245 mutex_destroy(&ldev->lock);
246 kfree(ldev);
247 }
248
mlx5_ldev_put(struct mlx5_lag * ldev)249 static void mlx5_ldev_put(struct mlx5_lag *ldev)
250 {
251 kref_put(&ldev->ref, mlx5_ldev_free);
252 }
253
mlx5_ldev_get(struct mlx5_lag * ldev)254 static void mlx5_ldev_get(struct mlx5_lag *ldev)
255 {
256 kref_get(&ldev->ref);
257 }
258
mlx5_lag_dev_alloc(struct mlx5_core_dev * dev)259 static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev)
260 {
261 struct mlx5_lag *ldev;
262 int err;
263
264 ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
265 if (!ldev)
266 return NULL;
267
268 ldev->wq = create_singlethread_workqueue("mlx5_lag");
269 if (!ldev->wq) {
270 kfree(ldev);
271 return NULL;
272 }
273
274 kref_init(&ldev->ref);
275 mutex_init(&ldev->lock);
276 INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
277
278 ldev->nb.notifier_call = mlx5_lag_netdev_event;
279 write_pnet(&ldev->net, mlx5_core_net(dev));
280 if (register_netdevice_notifier_net(read_pnet(&ldev->net), &ldev->nb)) {
281 ldev->nb.notifier_call = NULL;
282 mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
283 }
284 ldev->mode = MLX5_LAG_MODE_NONE;
285
286 err = mlx5_lag_mp_init(ldev);
287 if (err)
288 mlx5_core_err(dev, "Failed to init multipath lag err=%d\n",
289 err);
290
291 ldev->ports = MLX5_CAP_GEN(dev, num_lag_ports);
292 ldev->buckets = 1;
293
294 return ldev;
295 }
296
mlx5_lag_dev_get_netdev_idx(struct mlx5_lag * ldev,struct net_device * ndev)297 int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
298 struct net_device *ndev)
299 {
300 int i;
301
302 mlx5_ldev_for_each(i, 0, ldev)
303 if (ldev->pf[i].netdev == ndev)
304 return i;
305
306 return -ENOENT;
307 }
308
mlx5_lag_get_dev_index_by_seq(struct mlx5_lag * ldev,int seq)309 int mlx5_lag_get_dev_index_by_seq(struct mlx5_lag *ldev, int seq)
310 {
311 int i, num = 0;
312
313 if (!ldev)
314 return -ENOENT;
315
316 mlx5_ldev_for_each(i, 0, ldev) {
317 if (num == seq)
318 return i;
319 num++;
320 }
321 return -ENOENT;
322 }
323
mlx5_lag_num_devs(struct mlx5_lag * ldev)324 int mlx5_lag_num_devs(struct mlx5_lag *ldev)
325 {
326 int i, num = 0;
327
328 if (!ldev)
329 return 0;
330
331 mlx5_ldev_for_each(i, 0, ldev) {
332 (void)i;
333 num++;
334 }
335 return num;
336 }
337
mlx5_lag_num_netdevs(struct mlx5_lag * ldev)338 int mlx5_lag_num_netdevs(struct mlx5_lag *ldev)
339 {
340 int i, num = 0;
341
342 if (!ldev)
343 return 0;
344
345 mlx5_ldev_for_each(i, 0, ldev)
346 if (ldev->pf[i].netdev)
347 num++;
348 return num;
349 }
350
__mlx5_lag_is_roce(struct mlx5_lag * ldev)351 static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev)
352 {
353 return ldev->mode == MLX5_LAG_MODE_ROCE;
354 }
355
__mlx5_lag_is_sriov(struct mlx5_lag * ldev)356 static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev)
357 {
358 return ldev->mode == MLX5_LAG_MODE_SRIOV;
359 }
360
361 /* Create a mapping between steering slots and active ports.
362 * As we have ldev->buckets slots per port first assume the native
363 * mapping should be used.
364 * If there are ports that are disabled fill the relevant slots
365 * with mapping that points to active ports.
366 */
mlx5_infer_tx_affinity_mapping(struct lag_tracker * tracker,struct mlx5_lag * ldev,u8 buckets,u8 * ports)367 static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
368 struct mlx5_lag *ldev,
369 u8 buckets,
370 u8 *ports)
371 {
372 int disabled[MLX5_MAX_PORTS] = {};
373 int enabled[MLX5_MAX_PORTS] = {};
374 int disabled_ports_num = 0;
375 int enabled_ports_num = 0;
376 int idx;
377 u32 rand;
378 int i;
379 int j;
380
381 mlx5_ldev_for_each(i, 0, ldev) {
382 if (tracker->netdev_state[i].tx_enabled &&
383 tracker->netdev_state[i].link_up)
384 enabled[enabled_ports_num++] = i;
385 else
386 disabled[disabled_ports_num++] = i;
387 }
388
389 /* Use native mapping by default where each port's buckets
390 * point the native port: 1 1 1 .. 1 2 2 2 ... 2 3 3 3 ... 3 etc
391 */
392 mlx5_ldev_for_each(i, 0, ldev) {
393 for (j = 0; j < buckets; j++) {
394 idx = i * buckets + j;
395 ports[idx] = i + 1;
396 }
397 }
398
399 /* If all ports are disabled/enabled keep native mapping */
400 if (enabled_ports_num == ldev->ports ||
401 disabled_ports_num == ldev->ports)
402 return;
403
404 /* Go over the disabled ports and for each assign a random active port */
405 for (i = 0; i < disabled_ports_num; i++) {
406 for (j = 0; j < buckets; j++) {
407 get_random_bytes(&rand, 4);
408 ports[disabled[i] * buckets + j] = enabled[rand % enabled_ports_num] + 1;
409 }
410 }
411 }
412
mlx5_lag_has_drop_rule(struct mlx5_lag * ldev)413 static bool mlx5_lag_has_drop_rule(struct mlx5_lag *ldev)
414 {
415 int i;
416
417 mlx5_ldev_for_each(i, 0, ldev)
418 if (ldev->pf[i].has_drop)
419 return true;
420 return false;
421 }
422
mlx5_lag_drop_rule_cleanup(struct mlx5_lag * ldev)423 static void mlx5_lag_drop_rule_cleanup(struct mlx5_lag *ldev)
424 {
425 int i;
426
427 mlx5_ldev_for_each(i, 0, ldev) {
428 if (!ldev->pf[i].has_drop)
429 continue;
430
431 mlx5_esw_acl_ingress_vport_drop_rule_destroy(ldev->pf[i].dev->priv.eswitch,
432 MLX5_VPORT_UPLINK);
433 ldev->pf[i].has_drop = false;
434 }
435 }
436
mlx5_lag_drop_rule_setup(struct mlx5_lag * ldev,struct lag_tracker * tracker)437 static void mlx5_lag_drop_rule_setup(struct mlx5_lag *ldev,
438 struct lag_tracker *tracker)
439 {
440 u8 disabled_ports[MLX5_MAX_PORTS] = {};
441 struct mlx5_core_dev *dev;
442 int disabled_index;
443 int num_disabled;
444 int err;
445 int i;
446
447 /* First delete the current drop rule so there won't be any dropped
448 * packets
449 */
450 mlx5_lag_drop_rule_cleanup(ldev);
451
452 if (!ldev->tracker.has_inactive)
453 return;
454
455 mlx5_infer_tx_disabled(tracker, ldev, disabled_ports, &num_disabled);
456
457 for (i = 0; i < num_disabled; i++) {
458 disabled_index = disabled_ports[i];
459 dev = ldev->pf[disabled_index].dev;
460 err = mlx5_esw_acl_ingress_vport_drop_rule_create(dev->priv.eswitch,
461 MLX5_VPORT_UPLINK);
462 if (!err)
463 ldev->pf[disabled_index].has_drop = true;
464 else
465 mlx5_core_err(dev,
466 "Failed to create lag drop rule, error: %d", err);
467 }
468 }
469
mlx5_cmd_modify_active_port(struct mlx5_core_dev * dev,u8 ports)470 static int mlx5_cmd_modify_active_port(struct mlx5_core_dev *dev, u8 ports)
471 {
472 u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
473 void *lag_ctx;
474
475 lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
476
477 MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
478 MLX5_SET(modify_lag_in, in, field_select, 0x2);
479
480 MLX5_SET(lagc, lag_ctx, active_port, ports);
481
482 return mlx5_cmd_exec_in(dev, modify_lag, in);
483 }
484
_mlx5_modify_lag(struct mlx5_lag * ldev,u8 * ports)485 static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports)
486 {
487 int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
488 struct mlx5_core_dev *dev0;
489 u8 active_ports;
490 int ret;
491
492 if (idx < 0)
493 return -EINVAL;
494
495 dev0 = ldev->pf[idx].dev;
496 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags)) {
497 ret = mlx5_lag_port_sel_modify(ldev, ports);
498 if (ret ||
499 !MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table_bypass))
500 return ret;
501
502 active_ports = lag_active_port_bits(ldev);
503
504 return mlx5_cmd_modify_active_port(dev0, active_ports);
505 }
506 return mlx5_cmd_modify_lag(dev0, ldev, ports);
507 }
508
mlx5_lag_active_backup_get_netdev(struct mlx5_core_dev * dev)509 static struct net_device *mlx5_lag_active_backup_get_netdev(struct mlx5_core_dev *dev)
510 {
511 struct net_device *ndev = NULL;
512 struct mlx5_lag *ldev;
513 unsigned long flags;
514 int i, last_idx;
515
516 spin_lock_irqsave(&lag_lock, flags);
517 ldev = mlx5_lag_dev(dev);
518
519 if (!ldev)
520 goto unlock;
521
522 mlx5_ldev_for_each(i, 0, ldev)
523 if (ldev->tracker.netdev_state[i].tx_enabled)
524 ndev = ldev->pf[i].netdev;
525 if (!ndev) {
526 last_idx = mlx5_lag_get_dev_index_by_seq(ldev, ldev->ports - 1);
527 if (last_idx < 0)
528 goto unlock;
529 ndev = ldev->pf[last_idx].netdev;
530 }
531
532 dev_hold(ndev);
533
534 unlock:
535 spin_unlock_irqrestore(&lag_lock, flags);
536
537 return ndev;
538 }
539
mlx5_modify_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker)540 void mlx5_modify_lag(struct mlx5_lag *ldev,
541 struct lag_tracker *tracker)
542 {
543 int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
544 u8 ports[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS] = {};
545 struct mlx5_core_dev *dev0;
546 int idx;
547 int err;
548 int i;
549 int j;
550
551 if (first_idx < 0)
552 return;
553
554 dev0 = ldev->pf[first_idx].dev;
555 mlx5_infer_tx_affinity_mapping(tracker, ldev, ldev->buckets, ports);
556
557 mlx5_ldev_for_each(i, 0, ldev) {
558 for (j = 0; j < ldev->buckets; j++) {
559 idx = i * ldev->buckets + j;
560 if (ports[idx] == ldev->v2p_map[idx])
561 continue;
562 err = _mlx5_modify_lag(ldev, ports);
563 if (err) {
564 mlx5_core_err(dev0,
565 "Failed to modify LAG (%d)\n",
566 err);
567 return;
568 }
569 memcpy(ldev->v2p_map, ports, sizeof(ports));
570
571 mlx5_lag_print_mapping(dev0, ldev, tracker,
572 ldev->mode_flags);
573 break;
574 }
575 }
576
577 if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
578 struct net_device *ndev = mlx5_lag_active_backup_get_netdev(dev0);
579
580 if(!(ldev->mode == MLX5_LAG_MODE_ROCE))
581 mlx5_lag_drop_rule_setup(ldev, tracker);
582 /** Only sriov and roce lag should have tracker->tx_type set so
583 * no need to check the mode
584 */
585 blocking_notifier_call_chain(&dev0->priv.lag_nh,
586 MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE,
587 ndev);
588 dev_put(ndev);
589 }
590 }
591
mlx5_lag_set_port_sel_mode(struct mlx5_lag * ldev,enum mlx5_lag_mode mode,unsigned long * flags)592 static int mlx5_lag_set_port_sel_mode(struct mlx5_lag *ldev,
593 enum mlx5_lag_mode mode,
594 unsigned long *flags)
595 {
596 int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
597 struct mlx5_core_dev *dev0;
598
599 if (first_idx < 0)
600 return -EINVAL;
601
602 if (mode == MLX5_LAG_MODE_MPESW ||
603 mode == MLX5_LAG_MODE_MULTIPATH)
604 return 0;
605
606 dev0 = ldev->pf[first_idx].dev;
607
608 if (!MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table)) {
609 if (ldev->ports > 2)
610 return -EINVAL;
611 return 0;
612 }
613
614 if (ldev->ports > 2)
615 ldev->buckets = MLX5_LAG_MAX_HASH_BUCKETS;
616
617 set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags);
618
619 return 0;
620 }
621
mlx5_lag_set_flags(struct mlx5_lag * ldev,enum mlx5_lag_mode mode,struct lag_tracker * tracker,bool shared_fdb,unsigned long * flags)622 static int mlx5_lag_set_flags(struct mlx5_lag *ldev, enum mlx5_lag_mode mode,
623 struct lag_tracker *tracker, bool shared_fdb,
624 unsigned long *flags)
625 {
626 *flags = 0;
627 if (shared_fdb) {
628 set_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, flags);
629 set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags);
630 }
631
632 if (mode == MLX5_LAG_MODE_MPESW)
633 set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags);
634
635 return mlx5_lag_set_port_sel_mode(ldev, mode, flags);
636 }
637
mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode,unsigned long flags)638 char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags)
639 {
640 int port_sel_mode = get_port_sel_mode(mode, flags);
641
642 switch (port_sel_mode) {
643 case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY: return "queue_affinity";
644 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT: return "hash";
645 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW: return "mpesw";
646 default: return "invalid";
647 }
648 }
649
mlx5_lag_create_single_fdb(struct mlx5_lag * ldev)650 static int mlx5_lag_create_single_fdb(struct mlx5_lag *ldev)
651 {
652 int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
653 struct mlx5_eswitch *master_esw;
654 struct mlx5_core_dev *dev0;
655 int i, j;
656 int err;
657
658 if (first_idx < 0)
659 return -EINVAL;
660
661 dev0 = ldev->pf[first_idx].dev;
662 master_esw = dev0->priv.eswitch;
663 mlx5_ldev_for_each(i, first_idx + 1, ldev) {
664 struct mlx5_eswitch *slave_esw = ldev->pf[i].dev->priv.eswitch;
665
666 err = mlx5_eswitch_offloads_single_fdb_add_one(master_esw,
667 slave_esw, ldev->ports);
668 if (err)
669 goto err;
670 }
671 return 0;
672 err:
673 mlx5_ldev_for_each_reverse(j, i, first_idx + 1, ldev)
674 mlx5_eswitch_offloads_single_fdb_del_one(master_esw,
675 ldev->pf[j].dev->priv.eswitch);
676 return err;
677 }
678
mlx5_create_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker,enum mlx5_lag_mode mode,unsigned long flags)679 static int mlx5_create_lag(struct mlx5_lag *ldev,
680 struct lag_tracker *tracker,
681 enum mlx5_lag_mode mode,
682 unsigned long flags)
683 {
684 bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
685 int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
686 u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
687 struct mlx5_core_dev *dev0;
688 int err;
689
690 if (first_idx < 0)
691 return -EINVAL;
692
693 dev0 = ldev->pf[first_idx].dev;
694 if (tracker)
695 mlx5_lag_print_mapping(dev0, ldev, tracker, flags);
696 mlx5_core_info(dev0, "shared_fdb:%d mode:%s\n",
697 shared_fdb, mlx5_get_str_port_sel_mode(mode, flags));
698
699 err = mlx5_cmd_create_lag(dev0, ldev, mode, flags);
700 if (err) {
701 mlx5_core_err(dev0,
702 "Failed to create LAG (%d)\n",
703 err);
704 return err;
705 }
706
707 if (shared_fdb) {
708 err = mlx5_lag_create_single_fdb(ldev);
709 if (err)
710 mlx5_core_err(dev0, "Can't enable single FDB mode\n");
711 else
712 mlx5_core_info(dev0, "Operation mode is single FDB\n");
713 }
714
715 if (err) {
716 MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
717 if (mlx5_cmd_exec_in(dev0, destroy_lag, in))
718 mlx5_core_err(dev0,
719 "Failed to deactivate RoCE LAG; driver restart required\n");
720 }
721 BLOCKING_INIT_NOTIFIER_HEAD(&dev0->priv.lag_nh);
722
723 return err;
724 }
725
mlx5_activate_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker,enum mlx5_lag_mode mode,bool shared_fdb)726 int mlx5_activate_lag(struct mlx5_lag *ldev,
727 struct lag_tracker *tracker,
728 enum mlx5_lag_mode mode,
729 bool shared_fdb)
730 {
731 int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
732 bool roce_lag = mode == MLX5_LAG_MODE_ROCE;
733 struct mlx5_core_dev *dev0;
734 unsigned long flags = 0;
735 int err;
736
737 if (first_idx < 0)
738 return -EINVAL;
739
740 dev0 = ldev->pf[first_idx].dev;
741 err = mlx5_lag_set_flags(ldev, mode, tracker, shared_fdb, &flags);
742 if (err)
743 return err;
744
745 if (mode != MLX5_LAG_MODE_MPESW) {
746 mlx5_infer_tx_affinity_mapping(tracker, ldev, ldev->buckets, ldev->v2p_map);
747 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
748 err = mlx5_lag_port_sel_create(ldev, tracker->hash_type,
749 ldev->v2p_map);
750 if (err) {
751 mlx5_core_err(dev0,
752 "Failed to create LAG port selection(%d)\n",
753 err);
754 return err;
755 }
756 }
757 }
758
759 err = mlx5_create_lag(ldev, tracker, mode, flags);
760 if (err) {
761 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
762 mlx5_lag_port_sel_destroy(ldev);
763 if (roce_lag)
764 mlx5_core_err(dev0,
765 "Failed to activate RoCE LAG\n");
766 else
767 mlx5_core_err(dev0,
768 "Failed to activate VF LAG\n"
769 "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
770 return err;
771 }
772
773 if (tracker && tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP &&
774 !roce_lag)
775 mlx5_lag_drop_rule_setup(ldev, tracker);
776
777 ldev->mode = mode;
778 ldev->mode_flags = flags;
779 return 0;
780 }
781
mlx5_deactivate_lag(struct mlx5_lag * ldev)782 int mlx5_deactivate_lag(struct mlx5_lag *ldev)
783 {
784 int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
785 u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
786 bool roce_lag = __mlx5_lag_is_roce(ldev);
787 unsigned long flags = ldev->mode_flags;
788 struct mlx5_eswitch *master_esw;
789 struct mlx5_core_dev *dev0;
790 int err;
791 int i;
792
793 if (first_idx < 0)
794 return -EINVAL;
795
796 dev0 = ldev->pf[first_idx].dev;
797 master_esw = dev0->priv.eswitch;
798 ldev->mode = MLX5_LAG_MODE_NONE;
799 ldev->mode_flags = 0;
800 mlx5_lag_mp_reset(ldev);
801
802 if (test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags)) {
803 mlx5_ldev_for_each(i, first_idx + 1, ldev)
804 mlx5_eswitch_offloads_single_fdb_del_one(master_esw,
805 ldev->pf[i].dev->priv.eswitch);
806 clear_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
807 }
808
809 MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
810 err = mlx5_cmd_exec_in(dev0, destroy_lag, in);
811 if (err) {
812 if (roce_lag) {
813 mlx5_core_err(dev0,
814 "Failed to deactivate RoCE LAG; driver restart required\n");
815 } else {
816 mlx5_core_err(dev0,
817 "Failed to deactivate VF LAG; driver restart required\n"
818 "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
819 }
820 return err;
821 }
822
823 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
824 mlx5_lag_port_sel_destroy(ldev);
825 ldev->buckets = 1;
826 }
827 if (mlx5_lag_has_drop_rule(ldev))
828 mlx5_lag_drop_rule_cleanup(ldev);
829
830 return 0;
831 }
832
mlx5_lag_check_prereq(struct mlx5_lag * ldev)833 bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
834 {
835 int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
836 #ifdef CONFIG_MLX5_ESWITCH
837 struct mlx5_core_dev *dev;
838 u8 mode;
839 #endif
840 bool roce_support;
841 int i;
842
843 if (first_idx < 0 || mlx5_lag_num_devs(ldev) != ldev->ports)
844 return false;
845
846 #ifdef CONFIG_MLX5_ESWITCH
847 mlx5_ldev_for_each(i, 0, ldev) {
848 dev = ldev->pf[i].dev;
849 if (mlx5_eswitch_num_vfs(dev->priv.eswitch) && !is_mdev_switchdev_mode(dev))
850 return false;
851 }
852
853 dev = ldev->pf[first_idx].dev;
854 mode = mlx5_eswitch_mode(dev);
855 mlx5_ldev_for_each(i, 0, ldev)
856 if (mlx5_eswitch_mode(ldev->pf[i].dev) != mode)
857 return false;
858
859 #else
860 mlx5_ldev_for_each(i, 0, ldev)
861 if (mlx5_sriov_is_enabled(ldev->pf[i].dev))
862 return false;
863 #endif
864 roce_support = mlx5_get_roce_state(ldev->pf[first_idx].dev);
865 mlx5_ldev_for_each(i, first_idx + 1, ldev)
866 if (mlx5_get_roce_state(ldev->pf[i].dev) != roce_support)
867 return false;
868
869 return true;
870 }
871
mlx5_lag_add_devices(struct mlx5_lag * ldev)872 void mlx5_lag_add_devices(struct mlx5_lag *ldev)
873 {
874 int i;
875
876 mlx5_ldev_for_each(i, 0, ldev) {
877 if (ldev->pf[i].dev->priv.flags &
878 MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
879 continue;
880
881 ldev->pf[i].dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
882 mlx5_rescan_drivers_locked(ldev->pf[i].dev);
883 }
884 }
885
mlx5_lag_remove_devices(struct mlx5_lag * ldev)886 void mlx5_lag_remove_devices(struct mlx5_lag *ldev)
887 {
888 int i;
889
890 mlx5_ldev_for_each(i, 0, ldev) {
891 if (ldev->pf[i].dev->priv.flags &
892 MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
893 continue;
894
895 ldev->pf[i].dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
896 mlx5_rescan_drivers_locked(ldev->pf[i].dev);
897 }
898 }
899
mlx5_disable_lag(struct mlx5_lag * ldev)900 void mlx5_disable_lag(struct mlx5_lag *ldev)
901 {
902 bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
903 int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
904 struct mlx5_core_dev *dev0;
905 bool roce_lag;
906 int err;
907 int i;
908
909 if (idx < 0)
910 return;
911
912 dev0 = ldev->pf[idx].dev;
913 roce_lag = __mlx5_lag_is_roce(ldev);
914
915 if (shared_fdb) {
916 mlx5_lag_remove_devices(ldev);
917 } else if (roce_lag) {
918 if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) {
919 dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
920 mlx5_rescan_drivers_locked(dev0);
921 }
922 mlx5_ldev_for_each(i, idx + 1, ldev)
923 mlx5_nic_vport_disable_roce(ldev->pf[i].dev);
924 }
925
926 err = mlx5_deactivate_lag(ldev);
927 if (err)
928 return;
929
930 if (shared_fdb || roce_lag)
931 mlx5_lag_add_devices(ldev);
932
933 if (shared_fdb)
934 mlx5_ldev_for_each(i, 0, ldev)
935 if (!(ldev->pf[i].dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
936 mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch);
937 }
938
mlx5_lag_shared_fdb_supported(struct mlx5_lag * ldev)939 bool mlx5_lag_shared_fdb_supported(struct mlx5_lag *ldev)
940 {
941 int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
942 struct mlx5_core_dev *dev;
943 int i;
944
945 if (idx < 0)
946 return false;
947
948 mlx5_ldev_for_each(i, idx + 1, ldev) {
949 dev = ldev->pf[i].dev;
950 if (is_mdev_switchdev_mode(dev) &&
951 mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) &&
952 MLX5_CAP_GEN(dev, lag_native_fdb_selection) &&
953 MLX5_CAP_ESW(dev, root_ft_on_other_esw) &&
954 mlx5_eswitch_get_npeers(dev->priv.eswitch) ==
955 MLX5_CAP_GEN(dev, num_lag_ports) - 1)
956 continue;
957 return false;
958 }
959
960 dev = ldev->pf[idx].dev;
961 if (is_mdev_switchdev_mode(dev) &&
962 mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) &&
963 mlx5_esw_offloads_devcom_is_ready(dev->priv.eswitch) &&
964 MLX5_CAP_ESW(dev, esw_shared_ingress_acl) &&
965 mlx5_eswitch_get_npeers(dev->priv.eswitch) == MLX5_CAP_GEN(dev, num_lag_ports) - 1)
966 return true;
967
968 return false;
969 }
970
mlx5_lag_is_roce_lag(struct mlx5_lag * ldev)971 static bool mlx5_lag_is_roce_lag(struct mlx5_lag *ldev)
972 {
973 bool roce_lag = true;
974 int i;
975
976 mlx5_ldev_for_each(i, 0, ldev)
977 roce_lag = roce_lag && !mlx5_sriov_is_enabled(ldev->pf[i].dev);
978
979 #ifdef CONFIG_MLX5_ESWITCH
980 mlx5_ldev_for_each(i, 0, ldev)
981 roce_lag = roce_lag && is_mdev_legacy_mode(ldev->pf[i].dev);
982 #endif
983
984 return roce_lag;
985 }
986
mlx5_lag_should_modify_lag(struct mlx5_lag * ldev,bool do_bond)987 static bool mlx5_lag_should_modify_lag(struct mlx5_lag *ldev, bool do_bond)
988 {
989 return do_bond && __mlx5_lag_is_active(ldev) &&
990 ldev->mode != MLX5_LAG_MODE_MPESW;
991 }
992
mlx5_lag_should_disable_lag(struct mlx5_lag * ldev,bool do_bond)993 static bool mlx5_lag_should_disable_lag(struct mlx5_lag *ldev, bool do_bond)
994 {
995 return !do_bond && __mlx5_lag_is_active(ldev) &&
996 ldev->mode != MLX5_LAG_MODE_MPESW;
997 }
998
mlx5_do_bond(struct mlx5_lag * ldev)999 static void mlx5_do_bond(struct mlx5_lag *ldev)
1000 {
1001 int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
1002 struct lag_tracker tracker = { };
1003 struct mlx5_core_dev *dev0;
1004 struct net_device *ndev;
1005 bool do_bond, roce_lag;
1006 int err;
1007 int i;
1008
1009 if (idx < 0)
1010 return;
1011
1012 dev0 = ldev->pf[idx].dev;
1013 if (!mlx5_lag_is_ready(ldev)) {
1014 do_bond = false;
1015 } else {
1016 /* VF LAG is in multipath mode, ignore bond change requests */
1017 if (mlx5_lag_is_multipath(dev0))
1018 return;
1019
1020 tracker = ldev->tracker;
1021
1022 do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
1023 }
1024
1025 if (do_bond && !__mlx5_lag_is_active(ldev)) {
1026 bool shared_fdb = mlx5_lag_shared_fdb_supported(ldev);
1027
1028 roce_lag = mlx5_lag_is_roce_lag(ldev);
1029
1030 if (shared_fdb || roce_lag)
1031 mlx5_lag_remove_devices(ldev);
1032
1033 err = mlx5_activate_lag(ldev, &tracker,
1034 roce_lag ? MLX5_LAG_MODE_ROCE :
1035 MLX5_LAG_MODE_SRIOV,
1036 shared_fdb);
1037 if (err) {
1038 if (shared_fdb || roce_lag)
1039 mlx5_lag_add_devices(ldev);
1040 if (shared_fdb) {
1041 mlx5_ldev_for_each(i, 0, ldev)
1042 mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch);
1043 }
1044
1045 return;
1046 } else if (roce_lag) {
1047 dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
1048 mlx5_rescan_drivers_locked(dev0);
1049 mlx5_ldev_for_each(i, idx + 1, ldev) {
1050 if (mlx5_get_roce_state(ldev->pf[i].dev))
1051 mlx5_nic_vport_enable_roce(ldev->pf[i].dev);
1052 }
1053 } else if (shared_fdb) {
1054 int i;
1055
1056 dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
1057 mlx5_rescan_drivers_locked(dev0);
1058
1059 mlx5_ldev_for_each(i, 0, ldev) {
1060 err = mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch);
1061 if (err)
1062 break;
1063 }
1064
1065 if (err) {
1066 dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
1067 mlx5_rescan_drivers_locked(dev0);
1068 mlx5_deactivate_lag(ldev);
1069 mlx5_lag_add_devices(ldev);
1070 mlx5_ldev_for_each(i, 0, ldev)
1071 mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch);
1072 mlx5_core_err(dev0, "Failed to enable lag\n");
1073 return;
1074 }
1075 }
1076 if (tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
1077 ndev = mlx5_lag_active_backup_get_netdev(dev0);
1078 /** Only sriov and roce lag should have tracker->TX_type
1079 * set so no need to check the mode
1080 */
1081 blocking_notifier_call_chain(&dev0->priv.lag_nh,
1082 MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE,
1083 ndev);
1084 dev_put(ndev);
1085 }
1086 } else if (mlx5_lag_should_modify_lag(ldev, do_bond)) {
1087 mlx5_modify_lag(ldev, &tracker);
1088 } else if (mlx5_lag_should_disable_lag(ldev, do_bond)) {
1089 mlx5_disable_lag(ldev);
1090 }
1091 }
1092
1093 /* The last mdev to unregister will destroy the workqueue before removing the
1094 * devcom component, and as all the mdevs use the same devcom component we are
1095 * guaranteed that the devcom is valid while the calling work is running.
1096 */
mlx5_lag_get_devcom_comp(struct mlx5_lag * ldev)1097 struct mlx5_devcom_comp_dev *mlx5_lag_get_devcom_comp(struct mlx5_lag *ldev)
1098 {
1099 struct mlx5_devcom_comp_dev *devcom = NULL;
1100 int i;
1101
1102 mutex_lock(&ldev->lock);
1103 i = mlx5_get_next_ldev_func(ldev, 0);
1104 if (i < MLX5_MAX_PORTS)
1105 devcom = ldev->pf[i].dev->priv.hca_devcom_comp;
1106 mutex_unlock(&ldev->lock);
1107 return devcom;
1108 }
1109
mlx5_queue_bond_work(struct mlx5_lag * ldev,unsigned long delay)1110 static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
1111 {
1112 queue_delayed_work(ldev->wq, &ldev->bond_work, delay);
1113 }
1114
mlx5_do_bond_work(struct work_struct * work)1115 static void mlx5_do_bond_work(struct work_struct *work)
1116 {
1117 struct delayed_work *delayed_work = to_delayed_work(work);
1118 struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag,
1119 bond_work);
1120 struct mlx5_devcom_comp_dev *devcom;
1121 int status;
1122
1123 devcom = mlx5_lag_get_devcom_comp(ldev);
1124 if (!devcom)
1125 return;
1126
1127 status = mlx5_devcom_comp_trylock(devcom);
1128 if (!status) {
1129 mlx5_queue_bond_work(ldev, HZ);
1130 return;
1131 }
1132
1133 mutex_lock(&ldev->lock);
1134 if (ldev->mode_changes_in_progress) {
1135 mutex_unlock(&ldev->lock);
1136 mlx5_devcom_comp_unlock(devcom);
1137 mlx5_queue_bond_work(ldev, HZ);
1138 return;
1139 }
1140
1141 mlx5_do_bond(ldev);
1142 mutex_unlock(&ldev->lock);
1143 mlx5_devcom_comp_unlock(devcom);
1144 }
1145
mlx5_handle_changeupper_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct netdev_notifier_changeupper_info * info)1146 static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
1147 struct lag_tracker *tracker,
1148 struct netdev_notifier_changeupper_info *info)
1149 {
1150 struct net_device *upper = info->upper_dev, *ndev_tmp;
1151 struct netdev_lag_upper_info *lag_upper_info = NULL;
1152 bool is_bonded, is_in_lag, mode_supported;
1153 bool has_inactive = 0;
1154 struct slave *slave;
1155 u8 bond_status = 0;
1156 int num_slaves = 0;
1157 int changed = 0;
1158 int i, idx = -1;
1159
1160 if (!netif_is_lag_master(upper))
1161 return 0;
1162
1163 if (info->linking)
1164 lag_upper_info = info->upper_info;
1165
1166 /* The event may still be of interest if the slave does not belong to
1167 * us, but is enslaved to a master which has one or more of our netdevs
1168 * as slaves (e.g., if a new slave is added to a master that bonds two
1169 * of our netdevs, we should unbond).
1170 */
1171 rcu_read_lock();
1172 for_each_netdev_in_bond_rcu(upper, ndev_tmp) {
1173 mlx5_ldev_for_each(i, 0, ldev) {
1174 if (ldev->pf[i].netdev == ndev_tmp) {
1175 idx++;
1176 break;
1177 }
1178 }
1179 if (i < MLX5_MAX_PORTS) {
1180 slave = bond_slave_get_rcu(ndev_tmp);
1181 if (slave)
1182 has_inactive |= bond_is_slave_inactive(slave);
1183 bond_status |= (1 << idx);
1184 }
1185
1186 num_slaves++;
1187 }
1188 rcu_read_unlock();
1189
1190 /* None of this lagdev's netdevs are slaves of this master. */
1191 if (!(bond_status & GENMASK(ldev->ports - 1, 0)))
1192 return 0;
1193
1194 if (lag_upper_info) {
1195 tracker->tx_type = lag_upper_info->tx_type;
1196 tracker->hash_type = lag_upper_info->hash_type;
1197 }
1198
1199 tracker->has_inactive = has_inactive;
1200 /* Determine bonding status:
1201 * A device is considered bonded if both its physical ports are slaves
1202 * of the same lag master, and only them.
1203 */
1204 is_in_lag = num_slaves == ldev->ports &&
1205 bond_status == GENMASK(ldev->ports - 1, 0);
1206
1207 /* Lag mode must be activebackup or hash. */
1208 mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP ||
1209 tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH;
1210
1211 is_bonded = is_in_lag && mode_supported;
1212 if (tracker->is_bonded != is_bonded) {
1213 tracker->is_bonded = is_bonded;
1214 changed = 1;
1215 }
1216
1217 if (!is_in_lag)
1218 return changed;
1219
1220 if (!mlx5_lag_is_ready(ldev))
1221 NL_SET_ERR_MSG_MOD(info->info.extack,
1222 "Can't activate LAG offload, PF is configured with more than 64 VFs");
1223 else if (!mode_supported)
1224 NL_SET_ERR_MSG_MOD(info->info.extack,
1225 "Can't activate LAG offload, TX type isn't supported");
1226
1227 return changed;
1228 }
1229
mlx5_handle_changelowerstate_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct net_device * ndev,struct netdev_notifier_changelowerstate_info * info)1230 static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev,
1231 struct lag_tracker *tracker,
1232 struct net_device *ndev,
1233 struct netdev_notifier_changelowerstate_info *info)
1234 {
1235 struct netdev_lag_lower_state_info *lag_lower_info;
1236 int idx;
1237
1238 if (!netif_is_lag_port(ndev))
1239 return 0;
1240
1241 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev);
1242 if (idx < 0)
1243 return 0;
1244
1245 /* This information is used to determine virtual to physical
1246 * port mapping.
1247 */
1248 lag_lower_info = info->lower_state_info;
1249 if (!lag_lower_info)
1250 return 0;
1251
1252 tracker->netdev_state[idx] = *lag_lower_info;
1253
1254 return 1;
1255 }
1256
mlx5_handle_changeinfodata_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct net_device * ndev)1257 static int mlx5_handle_changeinfodata_event(struct mlx5_lag *ldev,
1258 struct lag_tracker *tracker,
1259 struct net_device *ndev)
1260 {
1261 struct net_device *ndev_tmp;
1262 struct slave *slave;
1263 bool has_inactive = 0;
1264 int idx;
1265
1266 if (!netif_is_lag_master(ndev))
1267 return 0;
1268
1269 rcu_read_lock();
1270 for_each_netdev_in_bond_rcu(ndev, ndev_tmp) {
1271 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
1272 if (idx < 0)
1273 continue;
1274
1275 slave = bond_slave_get_rcu(ndev_tmp);
1276 if (slave)
1277 has_inactive |= bond_is_slave_inactive(slave);
1278 }
1279 rcu_read_unlock();
1280
1281 if (tracker->has_inactive == has_inactive)
1282 return 0;
1283
1284 tracker->has_inactive = has_inactive;
1285
1286 return 1;
1287 }
1288
1289 /* this handler is always registered to netdev events */
mlx5_lag_netdev_event(struct notifier_block * this,unsigned long event,void * ptr)1290 static int mlx5_lag_netdev_event(struct notifier_block *this,
1291 unsigned long event, void *ptr)
1292 {
1293 struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
1294 struct lag_tracker tracker;
1295 struct mlx5_lag *ldev;
1296 int changed = 0;
1297
1298 if (event != NETDEV_CHANGEUPPER &&
1299 event != NETDEV_CHANGELOWERSTATE &&
1300 event != NETDEV_CHANGEINFODATA)
1301 return NOTIFY_DONE;
1302
1303 ldev = container_of(this, struct mlx5_lag, nb);
1304
1305 tracker = ldev->tracker;
1306
1307 switch (event) {
1308 case NETDEV_CHANGEUPPER:
1309 changed = mlx5_handle_changeupper_event(ldev, &tracker, ptr);
1310 break;
1311 case NETDEV_CHANGELOWERSTATE:
1312 changed = mlx5_handle_changelowerstate_event(ldev, &tracker,
1313 ndev, ptr);
1314 break;
1315 case NETDEV_CHANGEINFODATA:
1316 changed = mlx5_handle_changeinfodata_event(ldev, &tracker, ndev);
1317 break;
1318 }
1319
1320 ldev->tracker = tracker;
1321
1322 if (changed)
1323 mlx5_queue_bond_work(ldev, 0);
1324
1325 return NOTIFY_DONE;
1326 }
1327
mlx5_ldev_add_netdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev,struct net_device * netdev)1328 static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev,
1329 struct mlx5_core_dev *dev,
1330 struct net_device *netdev)
1331 {
1332 unsigned int fn = mlx5_get_dev_index(dev);
1333 unsigned long flags;
1334
1335 spin_lock_irqsave(&lag_lock, flags);
1336 ldev->pf[fn].netdev = netdev;
1337 ldev->tracker.netdev_state[fn].link_up = 0;
1338 ldev->tracker.netdev_state[fn].tx_enabled = 0;
1339 spin_unlock_irqrestore(&lag_lock, flags);
1340 }
1341
mlx5_ldev_remove_netdev(struct mlx5_lag * ldev,struct net_device * netdev)1342 static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev,
1343 struct net_device *netdev)
1344 {
1345 unsigned long flags;
1346 int i;
1347
1348 spin_lock_irqsave(&lag_lock, flags);
1349 mlx5_ldev_for_each(i, 0, ldev) {
1350 if (ldev->pf[i].netdev == netdev) {
1351 ldev->pf[i].netdev = NULL;
1352 break;
1353 }
1354 }
1355 spin_unlock_irqrestore(&lag_lock, flags);
1356 }
1357
mlx5_ldev_add_mdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev)1358 static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev,
1359 struct mlx5_core_dev *dev)
1360 {
1361 unsigned int fn = mlx5_get_dev_index(dev);
1362
1363 ldev->pf[fn].dev = dev;
1364 dev->priv.lag = ldev;
1365 }
1366
mlx5_ldev_remove_mdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev)1367 static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
1368 struct mlx5_core_dev *dev)
1369 {
1370 int fn;
1371
1372 fn = mlx5_get_dev_index(dev);
1373 if (ldev->pf[fn].dev != dev)
1374 return;
1375
1376 ldev->pf[fn].dev = NULL;
1377 dev->priv.lag = NULL;
1378 }
1379
1380 /* Must be called with HCA devcom component lock held */
__mlx5_lag_dev_add_mdev(struct mlx5_core_dev * dev)1381 static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
1382 {
1383 struct mlx5_devcom_comp_dev *pos = NULL;
1384 struct mlx5_lag *ldev = NULL;
1385 struct mlx5_core_dev *tmp_dev;
1386
1387 tmp_dev = mlx5_devcom_get_next_peer_data(dev->priv.hca_devcom_comp, &pos);
1388 if (tmp_dev)
1389 ldev = mlx5_lag_dev(tmp_dev);
1390
1391 if (!ldev) {
1392 ldev = mlx5_lag_dev_alloc(dev);
1393 if (!ldev) {
1394 mlx5_core_err(dev, "Failed to alloc lag dev\n");
1395 return 0;
1396 }
1397 mlx5_ldev_add_mdev(ldev, dev);
1398 return 0;
1399 }
1400
1401 mutex_lock(&ldev->lock);
1402 if (ldev->mode_changes_in_progress) {
1403 mutex_unlock(&ldev->lock);
1404 return -EAGAIN;
1405 }
1406 mlx5_ldev_get(ldev);
1407 mlx5_ldev_add_mdev(ldev, dev);
1408 mutex_unlock(&ldev->lock);
1409
1410 return 0;
1411 }
1412
mlx5_lag_unregister_hca_devcom_comp(struct mlx5_core_dev * dev)1413 static void mlx5_lag_unregister_hca_devcom_comp(struct mlx5_core_dev *dev)
1414 {
1415 mlx5_devcom_unregister_component(dev->priv.hca_devcom_comp);
1416 }
1417
mlx5_lag_register_hca_devcom_comp(struct mlx5_core_dev * dev)1418 static int mlx5_lag_register_hca_devcom_comp(struct mlx5_core_dev *dev)
1419 {
1420 struct mlx5_devcom_match_attr attr = {
1421 .key.val = mlx5_query_nic_system_image_guid(dev),
1422 .flags = MLX5_DEVCOM_MATCH_FLAGS_NS,
1423 .net = mlx5_core_net(dev),
1424 };
1425
1426 /* This component is use to sync adding core_dev to lag_dev and to sync
1427 * changes of mlx5_adev_devices between LAG layer and other layers.
1428 */
1429 dev->priv.hca_devcom_comp =
1430 mlx5_devcom_register_component(dev->priv.devc,
1431 MLX5_DEVCOM_HCA_PORTS,
1432 &attr, NULL, dev);
1433 if (!dev->priv.hca_devcom_comp) {
1434 mlx5_core_err(dev,
1435 "Failed to register devcom HCA component.");
1436 return -EINVAL;
1437 }
1438
1439 return 0;
1440 }
1441
mlx5_lag_remove_mdev(struct mlx5_core_dev * dev)1442 void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
1443 {
1444 struct mlx5_lag *ldev;
1445
1446 ldev = mlx5_lag_dev(dev);
1447 if (!ldev)
1448 return;
1449
1450 /* mdev is being removed, might as well remove debugfs
1451 * as early as possible.
1452 */
1453 mlx5_ldev_remove_debugfs(dev->priv.dbg.lag_debugfs);
1454 recheck:
1455 mutex_lock(&ldev->lock);
1456 if (ldev->mode_changes_in_progress) {
1457 mutex_unlock(&ldev->lock);
1458 msleep(100);
1459 goto recheck;
1460 }
1461 mlx5_ldev_remove_mdev(ldev, dev);
1462 mutex_unlock(&ldev->lock);
1463 mlx5_lag_unregister_hca_devcom_comp(dev);
1464 mlx5_ldev_put(ldev);
1465 }
1466
mlx5_lag_add_mdev(struct mlx5_core_dev * dev)1467 void mlx5_lag_add_mdev(struct mlx5_core_dev *dev)
1468 {
1469 int err;
1470
1471 if (!mlx5_lag_is_supported(dev))
1472 return;
1473
1474 if (mlx5_lag_register_hca_devcom_comp(dev))
1475 return;
1476
1477 recheck:
1478 mlx5_devcom_comp_lock(dev->priv.hca_devcom_comp);
1479 err = __mlx5_lag_dev_add_mdev(dev);
1480 mlx5_devcom_comp_unlock(dev->priv.hca_devcom_comp);
1481
1482 if (err) {
1483 msleep(100);
1484 goto recheck;
1485 }
1486 mlx5_ldev_add_debugfs(dev);
1487 }
1488
mlx5_lag_remove_netdev(struct mlx5_core_dev * dev,struct net_device * netdev)1489 void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev,
1490 struct net_device *netdev)
1491 {
1492 struct mlx5_lag *ldev;
1493 bool lag_is_active;
1494
1495 ldev = mlx5_lag_dev(dev);
1496 if (!ldev)
1497 return;
1498
1499 mutex_lock(&ldev->lock);
1500 mlx5_ldev_remove_netdev(ldev, netdev);
1501 clear_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
1502
1503 lag_is_active = __mlx5_lag_is_active(ldev);
1504 mutex_unlock(&ldev->lock);
1505
1506 if (lag_is_active)
1507 mlx5_queue_bond_work(ldev, 0);
1508 }
1509
mlx5_lag_add_netdev(struct mlx5_core_dev * dev,struct net_device * netdev)1510 void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
1511 struct net_device *netdev)
1512 {
1513 struct mlx5_lag *ldev;
1514 int num = 0;
1515
1516 ldev = mlx5_lag_dev(dev);
1517 if (!ldev)
1518 return;
1519
1520 mutex_lock(&ldev->lock);
1521 mlx5_ldev_add_netdev(ldev, dev, netdev);
1522 num = mlx5_lag_num_netdevs(ldev);
1523 if (num >= ldev->ports)
1524 set_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
1525 mutex_unlock(&ldev->lock);
1526 mlx5_queue_bond_work(ldev, 0);
1527 }
1528
mlx5_get_pre_ldev_func(struct mlx5_lag * ldev,int start_idx,int end_idx)1529 int mlx5_get_pre_ldev_func(struct mlx5_lag *ldev, int start_idx, int end_idx)
1530 {
1531 int i;
1532
1533 for (i = start_idx; i >= end_idx; i--)
1534 if (ldev->pf[i].dev)
1535 return i;
1536 return -1;
1537 }
1538
mlx5_get_next_ldev_func(struct mlx5_lag * ldev,int start_idx)1539 int mlx5_get_next_ldev_func(struct mlx5_lag *ldev, int start_idx)
1540 {
1541 int i;
1542
1543 for (i = start_idx; i < MLX5_MAX_PORTS; i++)
1544 if (ldev->pf[i].dev)
1545 return i;
1546 return MLX5_MAX_PORTS;
1547 }
1548
mlx5_lag_is_roce(struct mlx5_core_dev * dev)1549 bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
1550 {
1551 struct mlx5_lag *ldev;
1552 unsigned long flags;
1553 bool res;
1554
1555 spin_lock_irqsave(&lag_lock, flags);
1556 ldev = mlx5_lag_dev(dev);
1557 res = ldev && __mlx5_lag_is_roce(ldev);
1558 spin_unlock_irqrestore(&lag_lock, flags);
1559
1560 return res;
1561 }
1562 EXPORT_SYMBOL(mlx5_lag_is_roce);
1563
mlx5_lag_is_active(struct mlx5_core_dev * dev)1564 bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
1565 {
1566 struct mlx5_lag *ldev;
1567 unsigned long flags;
1568 bool res;
1569
1570 spin_lock_irqsave(&lag_lock, flags);
1571 ldev = mlx5_lag_dev(dev);
1572 res = ldev && __mlx5_lag_is_active(ldev);
1573 spin_unlock_irqrestore(&lag_lock, flags);
1574
1575 return res;
1576 }
1577 EXPORT_SYMBOL(mlx5_lag_is_active);
1578
mlx5_lag_mode_is_hash(struct mlx5_core_dev * dev)1579 bool mlx5_lag_mode_is_hash(struct mlx5_core_dev *dev)
1580 {
1581 struct mlx5_lag *ldev;
1582 unsigned long flags;
1583 bool res = 0;
1584
1585 spin_lock_irqsave(&lag_lock, flags);
1586 ldev = mlx5_lag_dev(dev);
1587 if (ldev)
1588 res = test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags);
1589 spin_unlock_irqrestore(&lag_lock, flags);
1590
1591 return res;
1592 }
1593 EXPORT_SYMBOL(mlx5_lag_mode_is_hash);
1594
mlx5_lag_is_master(struct mlx5_core_dev * dev)1595 bool mlx5_lag_is_master(struct mlx5_core_dev *dev)
1596 {
1597 struct mlx5_lag *ldev;
1598 unsigned long flags;
1599 bool res = false;
1600 int idx;
1601
1602 spin_lock_irqsave(&lag_lock, flags);
1603 ldev = mlx5_lag_dev(dev);
1604 idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
1605 res = ldev && __mlx5_lag_is_active(ldev) && idx >= 0 && dev == ldev->pf[idx].dev;
1606 spin_unlock_irqrestore(&lag_lock, flags);
1607
1608 return res;
1609 }
1610 EXPORT_SYMBOL(mlx5_lag_is_master);
1611
mlx5_lag_is_sriov(struct mlx5_core_dev * dev)1612 bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
1613 {
1614 struct mlx5_lag *ldev;
1615 unsigned long flags;
1616 bool res;
1617
1618 spin_lock_irqsave(&lag_lock, flags);
1619 ldev = mlx5_lag_dev(dev);
1620 res = ldev && __mlx5_lag_is_sriov(ldev);
1621 spin_unlock_irqrestore(&lag_lock, flags);
1622
1623 return res;
1624 }
1625 EXPORT_SYMBOL(mlx5_lag_is_sriov);
1626
mlx5_lag_is_shared_fdb(struct mlx5_core_dev * dev)1627 bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev)
1628 {
1629 struct mlx5_lag *ldev;
1630 unsigned long flags;
1631 bool res;
1632
1633 spin_lock_irqsave(&lag_lock, flags);
1634 ldev = mlx5_lag_dev(dev);
1635 res = ldev && test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
1636 spin_unlock_irqrestore(&lag_lock, flags);
1637
1638 return res;
1639 }
1640 EXPORT_SYMBOL(mlx5_lag_is_shared_fdb);
1641
mlx5_lag_disable_change(struct mlx5_core_dev * dev)1642 void mlx5_lag_disable_change(struct mlx5_core_dev *dev)
1643 {
1644 struct mlx5_lag *ldev;
1645
1646 ldev = mlx5_lag_dev(dev);
1647 if (!ldev)
1648 return;
1649
1650 mlx5_devcom_comp_lock(dev->priv.hca_devcom_comp);
1651 mutex_lock(&ldev->lock);
1652
1653 ldev->mode_changes_in_progress++;
1654 if (__mlx5_lag_is_active(ldev))
1655 mlx5_disable_lag(ldev);
1656
1657 mutex_unlock(&ldev->lock);
1658 mlx5_devcom_comp_unlock(dev->priv.hca_devcom_comp);
1659 }
1660
mlx5_lag_enable_change(struct mlx5_core_dev * dev)1661 void mlx5_lag_enable_change(struct mlx5_core_dev *dev)
1662 {
1663 struct mlx5_lag *ldev;
1664
1665 ldev = mlx5_lag_dev(dev);
1666 if (!ldev)
1667 return;
1668
1669 mutex_lock(&ldev->lock);
1670 ldev->mode_changes_in_progress--;
1671 mutex_unlock(&ldev->lock);
1672 mlx5_queue_bond_work(ldev, 0);
1673 }
1674
mlx5_lag_get_slave_port(struct mlx5_core_dev * dev,struct net_device * slave)1675 u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
1676 struct net_device *slave)
1677 {
1678 struct mlx5_lag *ldev;
1679 unsigned long flags;
1680 u8 port = 0;
1681 int i;
1682
1683 spin_lock_irqsave(&lag_lock, flags);
1684 ldev = mlx5_lag_dev(dev);
1685 if (!(ldev && __mlx5_lag_is_roce(ldev)))
1686 goto unlock;
1687
1688 mlx5_ldev_for_each(i, 0, ldev) {
1689 if (ldev->pf[i].netdev == slave) {
1690 port = i;
1691 break;
1692 }
1693 }
1694
1695 port = ldev->v2p_map[port * ldev->buckets];
1696
1697 unlock:
1698 spin_unlock_irqrestore(&lag_lock, flags);
1699 return port;
1700 }
1701 EXPORT_SYMBOL(mlx5_lag_get_slave_port);
1702
mlx5_lag_get_num_ports(struct mlx5_core_dev * dev)1703 u8 mlx5_lag_get_num_ports(struct mlx5_core_dev *dev)
1704 {
1705 struct mlx5_lag *ldev;
1706
1707 ldev = mlx5_lag_dev(dev);
1708 if (!ldev)
1709 return 0;
1710
1711 return ldev->ports;
1712 }
1713 EXPORT_SYMBOL(mlx5_lag_get_num_ports);
1714
mlx5_lag_get_next_peer_mdev(struct mlx5_core_dev * dev,int * i)1715 struct mlx5_core_dev *mlx5_lag_get_next_peer_mdev(struct mlx5_core_dev *dev, int *i)
1716 {
1717 struct mlx5_core_dev *peer_dev = NULL;
1718 struct mlx5_lag *ldev;
1719 unsigned long flags;
1720 int idx;
1721
1722 spin_lock_irqsave(&lag_lock, flags);
1723 ldev = mlx5_lag_dev(dev);
1724 if (!ldev)
1725 goto unlock;
1726
1727 if (*i == MLX5_MAX_PORTS)
1728 goto unlock;
1729 mlx5_ldev_for_each(idx, *i, ldev)
1730 if (ldev->pf[idx].dev != dev)
1731 break;
1732
1733 if (idx == MLX5_MAX_PORTS) {
1734 *i = idx;
1735 goto unlock;
1736 }
1737 *i = idx + 1;
1738
1739 peer_dev = ldev->pf[idx].dev;
1740
1741 unlock:
1742 spin_unlock_irqrestore(&lag_lock, flags);
1743 return peer_dev;
1744 }
1745 EXPORT_SYMBOL(mlx5_lag_get_next_peer_mdev);
1746
mlx5_lag_query_cong_counters(struct mlx5_core_dev * dev,u64 * values,int num_counters,size_t * offsets)1747 int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
1748 u64 *values,
1749 int num_counters,
1750 size_t *offsets)
1751 {
1752 int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
1753 struct mlx5_core_dev **mdev;
1754 int ret = 0, i, j, idx = 0;
1755 struct mlx5_lag *ldev;
1756 unsigned long flags;
1757 int num_ports;
1758 void *out;
1759
1760 out = kvzalloc(outlen, GFP_KERNEL);
1761 if (!out)
1762 return -ENOMEM;
1763
1764 mdev = kvzalloc(sizeof(mdev[0]) * MLX5_MAX_PORTS, GFP_KERNEL);
1765 if (!mdev) {
1766 ret = -ENOMEM;
1767 goto free_out;
1768 }
1769
1770 memset(values, 0, sizeof(*values) * num_counters);
1771
1772 spin_lock_irqsave(&lag_lock, flags);
1773 ldev = mlx5_lag_dev(dev);
1774 if (ldev && __mlx5_lag_is_active(ldev)) {
1775 num_ports = ldev->ports;
1776 mlx5_ldev_for_each(i, 0, ldev)
1777 mdev[idx++] = ldev->pf[i].dev;
1778 } else {
1779 num_ports = 1;
1780 mdev[MLX5_LAG_P1] = dev;
1781 }
1782 spin_unlock_irqrestore(&lag_lock, flags);
1783
1784 for (i = 0; i < num_ports; ++i) {
1785 u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {};
1786
1787 MLX5_SET(query_cong_statistics_in, in, opcode,
1788 MLX5_CMD_OP_QUERY_CONG_STATISTICS);
1789 ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in,
1790 out);
1791 if (ret)
1792 goto free_mdev;
1793
1794 for (j = 0; j < num_counters; ++j)
1795 values[j] += be64_to_cpup((__be64 *)(out + offsets[j]));
1796 }
1797
1798 free_mdev:
1799 kvfree(mdev);
1800 free_out:
1801 kvfree(out);
1802 return ret;
1803 }
1804 EXPORT_SYMBOL(mlx5_lag_query_cong_counters);
1805