1 /*
2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33 #include <linux/netdevice.h>
34 #include <net/bonding.h>
35 #include <linux/mlx5/driver.h>
36 #include <linux/mlx5/eswitch.h>
37 #include <linux/mlx5/vport.h>
38 #include "lib/mlx5.h"
39 #include "lib/devcom.h"
40 #include "mlx5_core.h"
41 #include "eswitch.h"
42 #include "esw/acl/ofld.h"
43 #include "lag.h"
44 #include "mp.h"
45 #include "mpesw.h"
46
47
48 /* General purpose, use for short periods of time.
49 * Beware of lock dependencies (preferably, no locks should be acquired
50 * under it).
51 */
52 static DEFINE_SPINLOCK(lag_lock);
53
get_port_sel_mode(enum mlx5_lag_mode mode,unsigned long flags)54 static int get_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags)
55 {
56 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
57 return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT;
58
59 if (mode == MLX5_LAG_MODE_MPESW)
60 return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW;
61
62 return MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY;
63 }
64
lag_active_port_bits(struct mlx5_lag * ldev)65 static u8 lag_active_port_bits(struct mlx5_lag *ldev)
66 {
67 u8 enabled_ports[MLX5_MAX_PORTS] = {};
68 u8 active_port = 0;
69 int num_enabled;
70 int idx;
71
72 mlx5_infer_tx_enabled(&ldev->tracker, ldev, enabled_ports,
73 &num_enabled);
74 for (idx = 0; idx < num_enabled; idx++)
75 active_port |= BIT_MASK(enabled_ports[idx]);
76
77 return active_port;
78 }
79
mlx5_cmd_create_lag(struct mlx5_core_dev * dev,struct mlx5_lag * ldev,int mode,unsigned long flags)80 static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, struct mlx5_lag *ldev,
81 int mode, unsigned long flags)
82 {
83 bool fdb_sel_mode = test_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE,
84 &flags);
85 int port_sel_mode = get_port_sel_mode(mode, flags);
86 u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {};
87 u8 *ports = ldev->v2p_map;
88 int idx0, idx1;
89 void *lag_ctx;
90
91 lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
92 MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG);
93 MLX5_SET(lagc, lag_ctx, fdb_selection_mode, fdb_sel_mode);
94 idx0 = mlx5_lag_get_dev_index_by_seq(ldev, 0);
95 idx1 = mlx5_lag_get_dev_index_by_seq(ldev, 1);
96
97 if (idx0 < 0 || idx1 < 0)
98 return -EINVAL;
99
100 switch (port_sel_mode) {
101 case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY:
102 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[idx0]);
103 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[idx1]);
104 break;
105 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT:
106 if (!MLX5_CAP_PORT_SELECTION(dev, port_select_flow_table_bypass))
107 break;
108
109 MLX5_SET(lagc, lag_ctx, active_port,
110 lag_active_port_bits(mlx5_lag_dev(dev)));
111 break;
112 default:
113 break;
114 }
115 MLX5_SET(lagc, lag_ctx, port_select_mode, port_sel_mode);
116
117 return mlx5_cmd_exec_in(dev, create_lag, in);
118 }
119
mlx5_cmd_modify_lag(struct mlx5_core_dev * dev,struct mlx5_lag * ldev,u8 * ports)120 static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, struct mlx5_lag *ldev,
121 u8 *ports)
122 {
123 u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
124 void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
125 int idx0, idx1;
126
127 idx0 = mlx5_lag_get_dev_index_by_seq(ldev, 0);
128 idx1 = mlx5_lag_get_dev_index_by_seq(ldev, 1);
129 if (idx0 < 0 || idx1 < 0)
130 return -EINVAL;
131
132 MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
133 MLX5_SET(modify_lag_in, in, field_select, 0x1);
134
135 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[idx0]);
136 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[idx1]);
137
138 return mlx5_cmd_exec_in(dev, modify_lag, in);
139 }
140
mlx5_cmd_create_vport_lag(struct mlx5_core_dev * dev)141 int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev)
142 {
143 u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {};
144
145 MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG);
146
147 return mlx5_cmd_exec_in(dev, create_vport_lag, in);
148 }
149 EXPORT_SYMBOL(mlx5_cmd_create_vport_lag);
150
mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev * dev)151 int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
152 {
153 u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {};
154
155 MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG);
156
157 return mlx5_cmd_exec_in(dev, destroy_vport_lag, in);
158 }
159 EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
160
mlx5_infer_tx_disabled(struct lag_tracker * tracker,struct mlx5_lag * ldev,u8 * ports,int * num_disabled)161 static void mlx5_infer_tx_disabled(struct lag_tracker *tracker, struct mlx5_lag *ldev,
162 u8 *ports, int *num_disabled)
163 {
164 int i;
165
166 *num_disabled = 0;
167 mlx5_ldev_for_each(i, 0, ldev)
168 if (!tracker->netdev_state[i].tx_enabled ||
169 !tracker->netdev_state[i].link_up)
170 ports[(*num_disabled)++] = i;
171 }
172
mlx5_infer_tx_enabled(struct lag_tracker * tracker,struct mlx5_lag * ldev,u8 * ports,int * num_enabled)173 void mlx5_infer_tx_enabled(struct lag_tracker *tracker, struct mlx5_lag *ldev,
174 u8 *ports, int *num_enabled)
175 {
176 int i;
177
178 *num_enabled = 0;
179 mlx5_ldev_for_each(i, 0, ldev)
180 if (tracker->netdev_state[i].tx_enabled &&
181 tracker->netdev_state[i].link_up)
182 ports[(*num_enabled)++] = i;
183
184 if (*num_enabled == 0)
185 mlx5_infer_tx_disabled(tracker, ldev, ports, num_enabled);
186 }
187
mlx5_lag_print_mapping(struct mlx5_core_dev * dev,struct mlx5_lag * ldev,struct lag_tracker * tracker,unsigned long flags)188 static void mlx5_lag_print_mapping(struct mlx5_core_dev *dev,
189 struct mlx5_lag *ldev,
190 struct lag_tracker *tracker,
191 unsigned long flags)
192 {
193 char buf[MLX5_MAX_PORTS * 10 + 1] = {};
194 u8 enabled_ports[MLX5_MAX_PORTS] = {};
195 int written = 0;
196 int num_enabled;
197 int idx;
198 int err;
199 int i;
200 int j;
201
202 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
203 mlx5_infer_tx_enabled(tracker, ldev, enabled_ports,
204 &num_enabled);
205 for (i = 0; i < num_enabled; i++) {
206 err = scnprintf(buf + written, 4, "%d, ", enabled_ports[i] + 1);
207 if (err != 3)
208 return;
209 written += err;
210 }
211 buf[written - 2] = 0;
212 mlx5_core_info(dev, "lag map active ports: %s\n", buf);
213 } else {
214 mlx5_ldev_for_each(i, 0, ldev) {
215 for (j = 0; j < ldev->buckets; j++) {
216 idx = i * ldev->buckets + j;
217 err = scnprintf(buf + written, 10,
218 " port %d:%d", i + 1, ldev->v2p_map[idx]);
219 if (err != 9)
220 return;
221 written += err;
222 }
223 }
224 mlx5_core_info(dev, "lag map:%s\n", buf);
225 }
226 }
227
228 static int mlx5_lag_netdev_event(struct notifier_block *this,
229 unsigned long event, void *ptr);
230 static void mlx5_do_bond_work(struct work_struct *work);
231
mlx5_ldev_free(struct kref * ref)232 static void mlx5_ldev_free(struct kref *ref)
233 {
234 struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref);
235 struct net *net;
236
237 if (ldev->nb.notifier_call) {
238 net = read_pnet(&ldev->net);
239 unregister_netdevice_notifier_net(net, &ldev->nb);
240 }
241
242 mlx5_lag_mp_cleanup(ldev);
243 cancel_delayed_work_sync(&ldev->bond_work);
244 destroy_workqueue(ldev->wq);
245 mutex_destroy(&ldev->lock);
246 kfree(ldev);
247 }
248
mlx5_ldev_put(struct mlx5_lag * ldev)249 static void mlx5_ldev_put(struct mlx5_lag *ldev)
250 {
251 kref_put(&ldev->ref, mlx5_ldev_free);
252 }
253
mlx5_ldev_get(struct mlx5_lag * ldev)254 static void mlx5_ldev_get(struct mlx5_lag *ldev)
255 {
256 kref_get(&ldev->ref);
257 }
258
mlx5_lag_dev_alloc(struct mlx5_core_dev * dev)259 static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev)
260 {
261 struct mlx5_lag *ldev;
262 int err;
263
264 ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
265 if (!ldev)
266 return NULL;
267
268 ldev->wq = create_singlethread_workqueue("mlx5_lag");
269 if (!ldev->wq) {
270 kfree(ldev);
271 return NULL;
272 }
273
274 kref_init(&ldev->ref);
275 mutex_init(&ldev->lock);
276 INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
277
278 ldev->nb.notifier_call = mlx5_lag_netdev_event;
279 write_pnet(&ldev->net, mlx5_core_net(dev));
280 if (register_netdevice_notifier_net(read_pnet(&ldev->net), &ldev->nb)) {
281 ldev->nb.notifier_call = NULL;
282 mlx5_core_err(dev, "Failed to register LAG netdev notifier\n");
283 }
284 ldev->mode = MLX5_LAG_MODE_NONE;
285
286 err = mlx5_lag_mp_init(ldev);
287 if (err)
288 mlx5_core_err(dev, "Failed to init multipath lag err=%d\n",
289 err);
290
291 ldev->ports = MLX5_CAP_GEN(dev, num_lag_ports);
292 ldev->buckets = 1;
293
294 return ldev;
295 }
296
mlx5_lag_dev_get_netdev_idx(struct mlx5_lag * ldev,struct net_device * ndev)297 int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
298 struct net_device *ndev)
299 {
300 int i;
301
302 mlx5_ldev_for_each(i, 0, ldev)
303 if (ldev->pf[i].netdev == ndev)
304 return i;
305
306 return -ENOENT;
307 }
308
mlx5_lag_get_dev_index_by_seq(struct mlx5_lag * ldev,int seq)309 int mlx5_lag_get_dev_index_by_seq(struct mlx5_lag *ldev, int seq)
310 {
311 int i, num = 0;
312
313 if (!ldev)
314 return -ENOENT;
315
316 mlx5_ldev_for_each(i, 0, ldev) {
317 if (num == seq)
318 return i;
319 num++;
320 }
321 return -ENOENT;
322 }
323
mlx5_lag_num_devs(struct mlx5_lag * ldev)324 int mlx5_lag_num_devs(struct mlx5_lag *ldev)
325 {
326 int i, num = 0;
327
328 if (!ldev)
329 return 0;
330
331 mlx5_ldev_for_each(i, 0, ldev) {
332 (void)i;
333 num++;
334 }
335 return num;
336 }
337
mlx5_lag_num_netdevs(struct mlx5_lag * ldev)338 int mlx5_lag_num_netdevs(struct mlx5_lag *ldev)
339 {
340 int i, num = 0;
341
342 if (!ldev)
343 return 0;
344
345 mlx5_ldev_for_each(i, 0, ldev)
346 if (ldev->pf[i].netdev)
347 num++;
348 return num;
349 }
350
__mlx5_lag_is_roce(struct mlx5_lag * ldev)351 static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev)
352 {
353 return ldev->mode == MLX5_LAG_MODE_ROCE;
354 }
355
__mlx5_lag_is_sriov(struct mlx5_lag * ldev)356 static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev)
357 {
358 return ldev->mode == MLX5_LAG_MODE_SRIOV;
359 }
360
361 /* Create a mapping between steering slots and active ports.
362 * As we have ldev->buckets slots per port first assume the native
363 * mapping should be used.
364 * If there are ports that are disabled fill the relevant slots
365 * with mapping that points to active ports.
366 */
mlx5_infer_tx_affinity_mapping(struct lag_tracker * tracker,struct mlx5_lag * ldev,u8 buckets,u8 * ports)367 static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker,
368 struct mlx5_lag *ldev,
369 u8 buckets,
370 u8 *ports)
371 {
372 int disabled[MLX5_MAX_PORTS] = {};
373 int enabled[MLX5_MAX_PORTS] = {};
374 int disabled_ports_num = 0;
375 int enabled_ports_num = 0;
376 int idx;
377 u32 rand;
378 int i;
379 int j;
380
381 mlx5_ldev_for_each(i, 0, ldev) {
382 if (tracker->netdev_state[i].tx_enabled &&
383 tracker->netdev_state[i].link_up)
384 enabled[enabled_ports_num++] = i;
385 else
386 disabled[disabled_ports_num++] = i;
387 }
388
389 /* Use native mapping by default where each port's buckets
390 * point the native port: 1 1 1 .. 1 2 2 2 ... 2 3 3 3 ... 3 etc
391 */
392 mlx5_ldev_for_each(i, 0, ldev) {
393 for (j = 0; j < buckets; j++) {
394 idx = i * buckets + j;
395 ports[idx] = i + 1;
396 }
397 }
398
399 /* If all ports are disabled/enabled keep native mapping */
400 if (enabled_ports_num == ldev->ports ||
401 disabled_ports_num == ldev->ports)
402 return;
403
404 /* Go over the disabled ports and for each assign a random active port */
405 for (i = 0; i < disabled_ports_num; i++) {
406 for (j = 0; j < buckets; j++) {
407 get_random_bytes(&rand, 4);
408 ports[disabled[i] * buckets + j] = enabled[rand % enabled_ports_num] + 1;
409 }
410 }
411 }
412
mlx5_lag_has_drop_rule(struct mlx5_lag * ldev)413 static bool mlx5_lag_has_drop_rule(struct mlx5_lag *ldev)
414 {
415 int i;
416
417 mlx5_ldev_for_each(i, 0, ldev)
418 if (ldev->pf[i].has_drop)
419 return true;
420 return false;
421 }
422
mlx5_lag_drop_rule_cleanup(struct mlx5_lag * ldev)423 static void mlx5_lag_drop_rule_cleanup(struct mlx5_lag *ldev)
424 {
425 int i;
426
427 mlx5_ldev_for_each(i, 0, ldev) {
428 if (!ldev->pf[i].has_drop)
429 continue;
430
431 mlx5_esw_acl_ingress_vport_drop_rule_destroy(ldev->pf[i].dev->priv.eswitch,
432 MLX5_VPORT_UPLINK);
433 ldev->pf[i].has_drop = false;
434 }
435 }
436
mlx5_lag_drop_rule_setup(struct mlx5_lag * ldev,struct lag_tracker * tracker)437 static void mlx5_lag_drop_rule_setup(struct mlx5_lag *ldev,
438 struct lag_tracker *tracker)
439 {
440 u8 disabled_ports[MLX5_MAX_PORTS] = {};
441 struct mlx5_core_dev *dev;
442 int disabled_index;
443 int num_disabled;
444 int err;
445 int i;
446
447 /* First delete the current drop rule so there won't be any dropped
448 * packets
449 */
450 mlx5_lag_drop_rule_cleanup(ldev);
451
452 if (!ldev->tracker.has_inactive)
453 return;
454
455 mlx5_infer_tx_disabled(tracker, ldev, disabled_ports, &num_disabled);
456
457 for (i = 0; i < num_disabled; i++) {
458 disabled_index = disabled_ports[i];
459 dev = ldev->pf[disabled_index].dev;
460 err = mlx5_esw_acl_ingress_vport_drop_rule_create(dev->priv.eswitch,
461 MLX5_VPORT_UPLINK);
462 if (!err)
463 ldev->pf[disabled_index].has_drop = true;
464 else
465 mlx5_core_err(dev,
466 "Failed to create lag drop rule, error: %d", err);
467 }
468 }
469
mlx5_cmd_modify_active_port(struct mlx5_core_dev * dev,u8 ports)470 static int mlx5_cmd_modify_active_port(struct mlx5_core_dev *dev, u8 ports)
471 {
472 u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
473 void *lag_ctx;
474
475 lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
476
477 MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
478 MLX5_SET(modify_lag_in, in, field_select, 0x2);
479
480 MLX5_SET(lagc, lag_ctx, active_port, ports);
481
482 return mlx5_cmd_exec_in(dev, modify_lag, in);
483 }
484
_mlx5_modify_lag(struct mlx5_lag * ldev,u8 * ports)485 static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports)
486 {
487 int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
488 struct mlx5_core_dev *dev0;
489 u8 active_ports;
490 int ret;
491
492 if (idx < 0)
493 return -EINVAL;
494
495 dev0 = ldev->pf[idx].dev;
496 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags)) {
497 ret = mlx5_lag_port_sel_modify(ldev, ports);
498 if (ret ||
499 !MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table_bypass))
500 return ret;
501
502 active_ports = lag_active_port_bits(ldev);
503
504 return mlx5_cmd_modify_active_port(dev0, active_ports);
505 }
506 return mlx5_cmd_modify_lag(dev0, ldev, ports);
507 }
508
mlx5_lag_active_backup_get_netdev(struct mlx5_core_dev * dev)509 static struct net_device *mlx5_lag_active_backup_get_netdev(struct mlx5_core_dev *dev)
510 {
511 struct net_device *ndev = NULL;
512 struct mlx5_lag *ldev;
513 unsigned long flags;
514 int i, last_idx;
515
516 spin_lock_irqsave(&lag_lock, flags);
517 ldev = mlx5_lag_dev(dev);
518
519 if (!ldev)
520 goto unlock;
521
522 mlx5_ldev_for_each(i, 0, ldev)
523 if (ldev->tracker.netdev_state[i].tx_enabled)
524 ndev = ldev->pf[i].netdev;
525 if (!ndev) {
526 last_idx = mlx5_lag_get_dev_index_by_seq(ldev, ldev->ports - 1);
527 if (last_idx < 0)
528 goto unlock;
529 ndev = ldev->pf[last_idx].netdev;
530 }
531
532 dev_hold(ndev);
533
534 unlock:
535 spin_unlock_irqrestore(&lag_lock, flags);
536
537 return ndev;
538 }
539
mlx5_modify_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker)540 void mlx5_modify_lag(struct mlx5_lag *ldev,
541 struct lag_tracker *tracker)
542 {
543 int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
544 u8 ports[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS] = {};
545 struct mlx5_core_dev *dev0;
546 int idx;
547 int err;
548 int i;
549 int j;
550
551 if (first_idx < 0)
552 return;
553
554 dev0 = ldev->pf[first_idx].dev;
555 mlx5_infer_tx_affinity_mapping(tracker, ldev, ldev->buckets, ports);
556
557 mlx5_ldev_for_each(i, 0, ldev) {
558 for (j = 0; j < ldev->buckets; j++) {
559 idx = i * ldev->buckets + j;
560 if (ports[idx] == ldev->v2p_map[idx])
561 continue;
562 err = _mlx5_modify_lag(ldev, ports);
563 if (err) {
564 mlx5_core_err(dev0,
565 "Failed to modify LAG (%d)\n",
566 err);
567 return;
568 }
569 memcpy(ldev->v2p_map, ports, sizeof(ports));
570
571 mlx5_lag_print_mapping(dev0, ldev, tracker,
572 ldev->mode_flags);
573 break;
574 }
575 }
576
577 if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
578 struct net_device *ndev = mlx5_lag_active_backup_get_netdev(dev0);
579
580 if(!(ldev->mode == MLX5_LAG_MODE_ROCE))
581 mlx5_lag_drop_rule_setup(ldev, tracker);
582 /** Only sriov and roce lag should have tracker->tx_type set so
583 * no need to check the mode
584 */
585 blocking_notifier_call_chain(&dev0->priv.lag_nh,
586 MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE,
587 ndev);
588 dev_put(ndev);
589 }
590 }
591
mlx5_lag_set_port_sel_mode(struct mlx5_lag * ldev,enum mlx5_lag_mode mode,unsigned long * flags)592 static int mlx5_lag_set_port_sel_mode(struct mlx5_lag *ldev,
593 enum mlx5_lag_mode mode,
594 unsigned long *flags)
595 {
596 int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
597 struct mlx5_core_dev *dev0;
598
599 if (first_idx < 0)
600 return -EINVAL;
601
602 if (mode == MLX5_LAG_MODE_MPESW ||
603 mode == MLX5_LAG_MODE_MULTIPATH)
604 return 0;
605
606 dev0 = ldev->pf[first_idx].dev;
607
608 if (!MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table)) {
609 if (ldev->ports > 2)
610 return -EINVAL;
611 return 0;
612 }
613
614 if (ldev->ports > 2)
615 ldev->buckets = MLX5_LAG_MAX_HASH_BUCKETS;
616
617 set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags);
618
619 return 0;
620 }
621
mlx5_lag_set_flags(struct mlx5_lag * ldev,enum mlx5_lag_mode mode,struct lag_tracker * tracker,bool shared_fdb,unsigned long * flags)622 static int mlx5_lag_set_flags(struct mlx5_lag *ldev, enum mlx5_lag_mode mode,
623 struct lag_tracker *tracker, bool shared_fdb,
624 unsigned long *flags)
625 {
626 *flags = 0;
627 if (shared_fdb) {
628 set_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, flags);
629 set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags);
630 }
631
632 if (mode == MLX5_LAG_MODE_MPESW)
633 set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags);
634
635 return mlx5_lag_set_port_sel_mode(ldev, mode, flags);
636 }
637
mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode,unsigned long flags)638 char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags)
639 {
640 int port_sel_mode = get_port_sel_mode(mode, flags);
641
642 switch (port_sel_mode) {
643 case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY: return "queue_affinity";
644 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT: return "hash";
645 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW: return "mpesw";
646 default: return "invalid";
647 }
648 }
649
mlx5_lag_create_single_fdb(struct mlx5_lag * ldev)650 static int mlx5_lag_create_single_fdb(struct mlx5_lag *ldev)
651 {
652 int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
653 struct mlx5_eswitch *master_esw;
654 struct mlx5_core_dev *dev0;
655 int i, j;
656 int err;
657
658 if (first_idx < 0)
659 return -EINVAL;
660
661 dev0 = ldev->pf[first_idx].dev;
662 master_esw = dev0->priv.eswitch;
663 mlx5_ldev_for_each(i, first_idx + 1, ldev) {
664 struct mlx5_eswitch *slave_esw = ldev->pf[i].dev->priv.eswitch;
665
666 err = mlx5_eswitch_offloads_single_fdb_add_one(master_esw,
667 slave_esw, ldev->ports);
668 if (err)
669 goto err;
670 }
671 return 0;
672 err:
673 mlx5_ldev_for_each_reverse(j, i, first_idx + 1, ldev)
674 mlx5_eswitch_offloads_single_fdb_del_one(master_esw,
675 ldev->pf[j].dev->priv.eswitch);
676 return err;
677 }
678
mlx5_create_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker,enum mlx5_lag_mode mode,unsigned long flags)679 static int mlx5_create_lag(struct mlx5_lag *ldev,
680 struct lag_tracker *tracker,
681 enum mlx5_lag_mode mode,
682 unsigned long flags)
683 {
684 bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
685 int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
686 u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
687 struct mlx5_core_dev *dev0;
688 int err;
689
690 if (first_idx < 0)
691 return -EINVAL;
692
693 dev0 = ldev->pf[first_idx].dev;
694 if (tracker)
695 mlx5_lag_print_mapping(dev0, ldev, tracker, flags);
696 mlx5_core_info(dev0, "shared_fdb:%d mode:%s\n",
697 shared_fdb, mlx5_get_str_port_sel_mode(mode, flags));
698
699 err = mlx5_cmd_create_lag(dev0, ldev, mode, flags);
700 if (err) {
701 mlx5_core_err(dev0,
702 "Failed to create LAG (%d)\n",
703 err);
704 return err;
705 }
706
707 if (shared_fdb) {
708 err = mlx5_lag_create_single_fdb(ldev);
709 if (err)
710 mlx5_core_err(dev0, "Can't enable single FDB mode\n");
711 else
712 mlx5_core_info(dev0, "Operation mode is single FDB\n");
713 }
714
715 if (err) {
716 MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
717 if (mlx5_cmd_exec_in(dev0, destroy_lag, in))
718 mlx5_core_err(dev0,
719 "Failed to deactivate RoCE LAG; driver restart required\n");
720 }
721 BLOCKING_INIT_NOTIFIER_HEAD(&dev0->priv.lag_nh);
722
723 return err;
724 }
725
mlx5_activate_lag(struct mlx5_lag * ldev,struct lag_tracker * tracker,enum mlx5_lag_mode mode,bool shared_fdb)726 int mlx5_activate_lag(struct mlx5_lag *ldev,
727 struct lag_tracker *tracker,
728 enum mlx5_lag_mode mode,
729 bool shared_fdb)
730 {
731 int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
732 bool roce_lag = mode == MLX5_LAG_MODE_ROCE;
733 struct mlx5_core_dev *dev0;
734 unsigned long flags = 0;
735 int err;
736
737 if (first_idx < 0)
738 return -EINVAL;
739
740 dev0 = ldev->pf[first_idx].dev;
741 err = mlx5_lag_set_flags(ldev, mode, tracker, shared_fdb, &flags);
742 if (err)
743 return err;
744
745 if (mode != MLX5_LAG_MODE_MPESW) {
746 mlx5_infer_tx_affinity_mapping(tracker, ldev, ldev->buckets, ldev->v2p_map);
747 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
748 err = mlx5_lag_port_sel_create(ldev, tracker->hash_type,
749 ldev->v2p_map);
750 if (err) {
751 mlx5_core_err(dev0,
752 "Failed to create LAG port selection(%d)\n",
753 err);
754 return err;
755 }
756 }
757 }
758
759 err = mlx5_create_lag(ldev, tracker, mode, flags);
760 if (err) {
761 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
762 mlx5_lag_port_sel_destroy(ldev);
763 if (roce_lag)
764 mlx5_core_err(dev0,
765 "Failed to activate RoCE LAG\n");
766 else
767 mlx5_core_err(dev0,
768 "Failed to activate VF LAG\n"
769 "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
770 return err;
771 }
772
773 if (tracker && tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP &&
774 !roce_lag)
775 mlx5_lag_drop_rule_setup(ldev, tracker);
776
777 ldev->mode = mode;
778 ldev->mode_flags = flags;
779 return 0;
780 }
781
mlx5_deactivate_lag(struct mlx5_lag * ldev)782 int mlx5_deactivate_lag(struct mlx5_lag *ldev)
783 {
784 int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
785 u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
786 bool roce_lag = __mlx5_lag_is_roce(ldev);
787 unsigned long flags = ldev->mode_flags;
788 struct mlx5_eswitch *master_esw;
789 struct mlx5_core_dev *dev0;
790 int err;
791 int i;
792
793 if (first_idx < 0)
794 return -EINVAL;
795
796 dev0 = ldev->pf[first_idx].dev;
797 master_esw = dev0->priv.eswitch;
798 ldev->mode = MLX5_LAG_MODE_NONE;
799 ldev->mode_flags = 0;
800 mlx5_lag_mp_reset(ldev);
801
802 if (test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags)) {
803 mlx5_ldev_for_each(i, first_idx + 1, ldev)
804 mlx5_eswitch_offloads_single_fdb_del_one(master_esw,
805 ldev->pf[i].dev->priv.eswitch);
806 clear_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
807 }
808
809 MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
810 err = mlx5_cmd_exec_in(dev0, destroy_lag, in);
811 if (err) {
812 if (roce_lag) {
813 mlx5_core_err(dev0,
814 "Failed to deactivate RoCE LAG; driver restart required\n");
815 } else {
816 mlx5_core_err(dev0,
817 "Failed to deactivate VF LAG; driver restart required\n"
818 "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n");
819 }
820 return err;
821 }
822
823 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
824 mlx5_lag_port_sel_destroy(ldev);
825 ldev->buckets = 1;
826 }
827 if (mlx5_lag_has_drop_rule(ldev))
828 mlx5_lag_drop_rule_cleanup(ldev);
829
830 return 0;
831 }
832
mlx5_lag_check_prereq(struct mlx5_lag * ldev)833 bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
834 {
835 int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
836 #ifdef CONFIG_MLX5_ESWITCH
837 struct mlx5_core_dev *dev;
838 u8 mode;
839 #endif
840 bool roce_support;
841 int i;
842
843 if (first_idx < 0 || mlx5_lag_num_devs(ldev) != ldev->ports)
844 return false;
845
846 #ifdef CONFIG_MLX5_ESWITCH
847 mlx5_ldev_for_each(i, 0, ldev) {
848 dev = ldev->pf[i].dev;
849 if (mlx5_eswitch_num_vfs(dev->priv.eswitch) && !is_mdev_switchdev_mode(dev))
850 return false;
851 }
852
853 dev = ldev->pf[first_idx].dev;
854 mode = mlx5_eswitch_mode(dev);
855 mlx5_ldev_for_each(i, 0, ldev)
856 if (mlx5_eswitch_mode(ldev->pf[i].dev) != mode)
857 return false;
858
859 #else
860 mlx5_ldev_for_each(i, 0, ldev)
861 if (mlx5_sriov_is_enabled(ldev->pf[i].dev))
862 return false;
863 #endif
864 roce_support = mlx5_get_roce_state(ldev->pf[first_idx].dev);
865 mlx5_ldev_for_each(i, first_idx + 1, ldev)
866 if (mlx5_get_roce_state(ldev->pf[i].dev) != roce_support)
867 return false;
868
869 return true;
870 }
871
mlx5_lag_add_devices(struct mlx5_lag * ldev)872 void mlx5_lag_add_devices(struct mlx5_lag *ldev)
873 {
874 int i;
875
876 mlx5_ldev_for_each(i, 0, ldev) {
877 if (ldev->pf[i].dev->priv.flags &
878 MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
879 continue;
880
881 ldev->pf[i].dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
882 mlx5_rescan_drivers_locked(ldev->pf[i].dev);
883 }
884 }
885
mlx5_lag_remove_devices(struct mlx5_lag * ldev)886 void mlx5_lag_remove_devices(struct mlx5_lag *ldev)
887 {
888 int i;
889
890 mlx5_ldev_for_each(i, 0, ldev) {
891 if (ldev->pf[i].dev->priv.flags &
892 MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)
893 continue;
894
895 ldev->pf[i].dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
896 mlx5_rescan_drivers_locked(ldev->pf[i].dev);
897 }
898 }
899
mlx5_disable_lag(struct mlx5_lag * ldev)900 void mlx5_disable_lag(struct mlx5_lag *ldev)
901 {
902 bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
903 int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
904 struct mlx5_core_dev *dev0;
905 bool roce_lag;
906 int err;
907 int i;
908
909 if (idx < 0)
910 return;
911
912 dev0 = ldev->pf[idx].dev;
913 roce_lag = __mlx5_lag_is_roce(ldev);
914
915 if (shared_fdb) {
916 mlx5_lag_remove_devices(ldev);
917 } else if (roce_lag) {
918 if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) {
919 dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
920 mlx5_rescan_drivers_locked(dev0);
921 }
922 mlx5_ldev_for_each(i, idx + 1, ldev)
923 mlx5_nic_vport_disable_roce(ldev->pf[i].dev);
924 }
925
926 err = mlx5_deactivate_lag(ldev);
927 if (err)
928 return;
929
930 if (shared_fdb || roce_lag)
931 mlx5_lag_add_devices(ldev);
932
933 if (shared_fdb)
934 mlx5_ldev_for_each(i, 0, ldev)
935 if (!(ldev->pf[i].dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV))
936 mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch);
937 }
938
mlx5_lag_shared_fdb_supported(struct mlx5_lag * ldev)939 bool mlx5_lag_shared_fdb_supported(struct mlx5_lag *ldev)
940 {
941 int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
942 struct mlx5_core_dev *dev;
943 int i;
944
945 if (idx < 0)
946 return false;
947
948 mlx5_ldev_for_each(i, idx + 1, ldev) {
949 dev = ldev->pf[i].dev;
950 if (is_mdev_switchdev_mode(dev) &&
951 mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) &&
952 MLX5_CAP_GEN(dev, lag_native_fdb_selection) &&
953 MLX5_CAP_ESW(dev, root_ft_on_other_esw) &&
954 mlx5_eswitch_get_npeers(dev->priv.eswitch) ==
955 MLX5_CAP_GEN(dev, num_lag_ports) - 1)
956 continue;
957 return false;
958 }
959
960 dev = ldev->pf[idx].dev;
961 if (is_mdev_switchdev_mode(dev) &&
962 mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) &&
963 mlx5_esw_offloads_devcom_is_ready(dev->priv.eswitch) &&
964 MLX5_CAP_ESW(dev, esw_shared_ingress_acl) &&
965 mlx5_eswitch_get_npeers(dev->priv.eswitch) == MLX5_CAP_GEN(dev, num_lag_ports) - 1)
966 return true;
967
968 return false;
969 }
970
mlx5_lag_is_roce_lag(struct mlx5_lag * ldev)971 static bool mlx5_lag_is_roce_lag(struct mlx5_lag *ldev)
972 {
973 bool roce_lag = true;
974 int i;
975
976 mlx5_ldev_for_each(i, 0, ldev)
977 roce_lag = roce_lag && !mlx5_sriov_is_enabled(ldev->pf[i].dev);
978
979 #ifdef CONFIG_MLX5_ESWITCH
980 mlx5_ldev_for_each(i, 0, ldev)
981 roce_lag = roce_lag && is_mdev_legacy_mode(ldev->pf[i].dev);
982 #endif
983
984 return roce_lag;
985 }
986
mlx5_lag_should_modify_lag(struct mlx5_lag * ldev,bool do_bond)987 static bool mlx5_lag_should_modify_lag(struct mlx5_lag *ldev, bool do_bond)
988 {
989 return do_bond && __mlx5_lag_is_active(ldev) &&
990 ldev->mode != MLX5_LAG_MODE_MPESW;
991 }
992
mlx5_lag_should_disable_lag(struct mlx5_lag * ldev,bool do_bond)993 static bool mlx5_lag_should_disable_lag(struct mlx5_lag *ldev, bool do_bond)
994 {
995 return !do_bond && __mlx5_lag_is_active(ldev) &&
996 ldev->mode != MLX5_LAG_MODE_MPESW;
997 }
998
mlx5_do_bond(struct mlx5_lag * ldev)999 static void mlx5_do_bond(struct mlx5_lag *ldev)
1000 {
1001 int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
1002 struct lag_tracker tracker = { };
1003 struct mlx5_core_dev *dev0;
1004 struct net_device *ndev;
1005 bool do_bond, roce_lag;
1006 int err;
1007 int i;
1008
1009 if (idx < 0)
1010 return;
1011
1012 dev0 = ldev->pf[idx].dev;
1013 if (!mlx5_lag_is_ready(ldev)) {
1014 do_bond = false;
1015 } else {
1016 /* VF LAG is in multipath mode, ignore bond change requests */
1017 if (mlx5_lag_is_multipath(dev0))
1018 return;
1019
1020 tracker = ldev->tracker;
1021
1022 do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
1023 }
1024
1025 if (do_bond && !__mlx5_lag_is_active(ldev)) {
1026 bool shared_fdb = mlx5_lag_shared_fdb_supported(ldev);
1027
1028 roce_lag = mlx5_lag_is_roce_lag(ldev);
1029
1030 if (shared_fdb || roce_lag)
1031 mlx5_lag_remove_devices(ldev);
1032
1033 err = mlx5_activate_lag(ldev, &tracker,
1034 roce_lag ? MLX5_LAG_MODE_ROCE :
1035 MLX5_LAG_MODE_SRIOV,
1036 shared_fdb);
1037 if (err) {
1038 if (shared_fdb || roce_lag)
1039 mlx5_lag_add_devices(ldev);
1040 if (shared_fdb) {
1041 mlx5_ldev_for_each(i, 0, ldev)
1042 mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch);
1043 }
1044
1045 return;
1046 } else if (roce_lag) {
1047 dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
1048 mlx5_rescan_drivers_locked(dev0);
1049 mlx5_ldev_for_each(i, idx + 1, ldev) {
1050 if (mlx5_get_roce_state(ldev->pf[i].dev))
1051 mlx5_nic_vport_enable_roce(ldev->pf[i].dev);
1052 }
1053 } else if (shared_fdb) {
1054 int i;
1055
1056 dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
1057 mlx5_rescan_drivers_locked(dev0);
1058
1059 mlx5_ldev_for_each(i, 0, ldev) {
1060 err = mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch);
1061 if (err)
1062 break;
1063 }
1064
1065 if (err) {
1066 dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV;
1067 mlx5_rescan_drivers_locked(dev0);
1068 mlx5_deactivate_lag(ldev);
1069 mlx5_lag_add_devices(ldev);
1070 mlx5_ldev_for_each(i, 0, ldev)
1071 mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch);
1072 mlx5_core_err(dev0, "Failed to enable lag\n");
1073 return;
1074 }
1075 }
1076 if (tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) {
1077 ndev = mlx5_lag_active_backup_get_netdev(dev0);
1078 /** Only sriov and roce lag should have tracker->TX_type
1079 * set so no need to check the mode
1080 */
1081 blocking_notifier_call_chain(&dev0->priv.lag_nh,
1082 MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE,
1083 ndev);
1084 dev_put(ndev);
1085 }
1086 } else if (mlx5_lag_should_modify_lag(ldev, do_bond)) {
1087 mlx5_modify_lag(ldev, &tracker);
1088 } else if (mlx5_lag_should_disable_lag(ldev, do_bond)) {
1089 mlx5_disable_lag(ldev);
1090 }
1091 }
1092
1093 /* The last mdev to unregister will destroy the workqueue before removing the
1094 * devcom component, and as all the mdevs use the same devcom component we are
1095 * guaranteed that the devcom is valid while the calling work is running.
1096 */
mlx5_lag_get_devcom_comp(struct mlx5_lag * ldev)1097 struct mlx5_devcom_comp_dev *mlx5_lag_get_devcom_comp(struct mlx5_lag *ldev)
1098 {
1099 struct mlx5_devcom_comp_dev *devcom = NULL;
1100 int i;
1101
1102 mutex_lock(&ldev->lock);
1103 i = mlx5_get_next_ldev_func(ldev, 0);
1104 if (i < MLX5_MAX_PORTS)
1105 devcom = ldev->pf[i].dev->priv.hca_devcom_comp;
1106 mutex_unlock(&ldev->lock);
1107 return devcom;
1108 }
1109
mlx5_queue_bond_work(struct mlx5_lag * ldev,unsigned long delay)1110 static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay)
1111 {
1112 queue_delayed_work(ldev->wq, &ldev->bond_work, delay);
1113 }
1114
mlx5_do_bond_work(struct work_struct * work)1115 static void mlx5_do_bond_work(struct work_struct *work)
1116 {
1117 struct delayed_work *delayed_work = to_delayed_work(work);
1118 struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag,
1119 bond_work);
1120 struct mlx5_devcom_comp_dev *devcom;
1121 int status;
1122
1123 devcom = mlx5_lag_get_devcom_comp(ldev);
1124 if (!devcom)
1125 return;
1126
1127 status = mlx5_devcom_comp_trylock(devcom);
1128 if (!status) {
1129 mlx5_queue_bond_work(ldev, HZ);
1130 return;
1131 }
1132
1133 mutex_lock(&ldev->lock);
1134 if (ldev->mode_changes_in_progress) {
1135 mutex_unlock(&ldev->lock);
1136 mlx5_devcom_comp_unlock(devcom);
1137 mlx5_queue_bond_work(ldev, HZ);
1138 return;
1139 }
1140
1141 mlx5_do_bond(ldev);
1142 mutex_unlock(&ldev->lock);
1143 mlx5_devcom_comp_unlock(devcom);
1144 }
1145
mlx5_handle_changeupper_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct netdev_notifier_changeupper_info * info)1146 static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev,
1147 struct lag_tracker *tracker,
1148 struct netdev_notifier_changeupper_info *info)
1149 {
1150 struct net_device *upper = info->upper_dev, *ndev_tmp;
1151 struct netdev_lag_upper_info *lag_upper_info = NULL;
1152 bool is_bonded, is_in_lag, mode_supported;
1153 bool has_inactive = 0;
1154 struct slave *slave;
1155 u8 bond_status = 0;
1156 int num_slaves = 0;
1157 int changed = 0;
1158 int i, idx = -1;
1159
1160 if (!netif_is_lag_master(upper))
1161 return 0;
1162
1163 if (info->linking)
1164 lag_upper_info = info->upper_info;
1165
1166 /* The event may still be of interest if the slave does not belong to
1167 * us, but is enslaved to a master which has one or more of our netdevs
1168 * as slaves (e.g., if a new slave is added to a master that bonds two
1169 * of our netdevs, we should unbond).
1170 */
1171 rcu_read_lock();
1172 for_each_netdev_in_bond_rcu(upper, ndev_tmp) {
1173 mlx5_ldev_for_each(i, 0, ldev) {
1174 if (ldev->pf[i].netdev == ndev_tmp) {
1175 idx++;
1176 break;
1177 }
1178 }
1179 if (i < MLX5_MAX_PORTS) {
1180 slave = bond_slave_get_rcu(ndev_tmp);
1181 if (slave)
1182 has_inactive |= bond_is_slave_inactive(slave);
1183 bond_status |= (1 << idx);
1184 }
1185
1186 num_slaves++;
1187 }
1188 rcu_read_unlock();
1189
1190 /* None of this lagdev's netdevs are slaves of this master. */
1191 if (!(bond_status & GENMASK(ldev->ports - 1, 0)))
1192 return 0;
1193
1194 if (lag_upper_info) {
1195 tracker->tx_type = lag_upper_info->tx_type;
1196 tracker->hash_type = lag_upper_info->hash_type;
1197 }
1198
1199 tracker->has_inactive = has_inactive;
1200 /* Determine bonding status:
1201 * A device is considered bonded if both its physical ports are slaves
1202 * of the same lag master, and only them.
1203 */
1204 is_in_lag = num_slaves == ldev->ports &&
1205 bond_status == GENMASK(ldev->ports - 1, 0);
1206
1207 /* Lag mode must be activebackup or hash. */
1208 mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP ||
1209 tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH;
1210
1211 is_bonded = is_in_lag && mode_supported;
1212 if (tracker->is_bonded != is_bonded) {
1213 tracker->is_bonded = is_bonded;
1214 changed = 1;
1215 }
1216
1217 if (!is_in_lag)
1218 return changed;
1219
1220 if (!mlx5_lag_is_ready(ldev))
1221 NL_SET_ERR_MSG_MOD(info->info.extack,
1222 "Can't activate LAG offload, PF is configured with more than 64 VFs");
1223 else if (!mode_supported)
1224 NL_SET_ERR_MSG_MOD(info->info.extack,
1225 "Can't activate LAG offload, TX type isn't supported");
1226
1227 return changed;
1228 }
1229
mlx5_handle_changelowerstate_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct net_device * ndev,struct netdev_notifier_changelowerstate_info * info)1230 static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev,
1231 struct lag_tracker *tracker,
1232 struct net_device *ndev,
1233 struct netdev_notifier_changelowerstate_info *info)
1234 {
1235 struct netdev_lag_lower_state_info *lag_lower_info;
1236 int idx;
1237
1238 if (!netif_is_lag_port(ndev))
1239 return 0;
1240
1241 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev);
1242 if (idx < 0)
1243 return 0;
1244
1245 /* This information is used to determine virtual to physical
1246 * port mapping.
1247 */
1248 lag_lower_info = info->lower_state_info;
1249 if (!lag_lower_info)
1250 return 0;
1251
1252 tracker->netdev_state[idx] = *lag_lower_info;
1253
1254 return 1;
1255 }
1256
mlx5_handle_changeinfodata_event(struct mlx5_lag * ldev,struct lag_tracker * tracker,struct net_device * ndev)1257 static int mlx5_handle_changeinfodata_event(struct mlx5_lag *ldev,
1258 struct lag_tracker *tracker,
1259 struct net_device *ndev)
1260 {
1261 struct net_device *ndev_tmp;
1262 struct slave *slave;
1263 bool has_inactive = 0;
1264 int idx;
1265
1266 if (!netif_is_lag_master(ndev))
1267 return 0;
1268
1269 rcu_read_lock();
1270 for_each_netdev_in_bond_rcu(ndev, ndev_tmp) {
1271 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp);
1272 if (idx < 0)
1273 continue;
1274
1275 slave = bond_slave_get_rcu(ndev_tmp);
1276 if (slave)
1277 has_inactive |= bond_is_slave_inactive(slave);
1278 }
1279 rcu_read_unlock();
1280
1281 if (tracker->has_inactive == has_inactive)
1282 return 0;
1283
1284 tracker->has_inactive = has_inactive;
1285
1286 return 1;
1287 }
1288
1289 /* this handler is always registered to netdev events */
mlx5_lag_netdev_event(struct notifier_block * this,unsigned long event,void * ptr)1290 static int mlx5_lag_netdev_event(struct notifier_block *this,
1291 unsigned long event, void *ptr)
1292 {
1293 struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
1294 struct lag_tracker tracker;
1295 struct mlx5_lag *ldev;
1296 int changed = 0;
1297
1298 if (event != NETDEV_CHANGEUPPER &&
1299 event != NETDEV_CHANGELOWERSTATE &&
1300 event != NETDEV_CHANGEINFODATA)
1301 return NOTIFY_DONE;
1302
1303 ldev = container_of(this, struct mlx5_lag, nb);
1304
1305 tracker = ldev->tracker;
1306
1307 switch (event) {
1308 case NETDEV_CHANGEUPPER:
1309 changed = mlx5_handle_changeupper_event(ldev, &tracker, ptr);
1310 break;
1311 case NETDEV_CHANGELOWERSTATE:
1312 changed = mlx5_handle_changelowerstate_event(ldev, &tracker,
1313 ndev, ptr);
1314 break;
1315 case NETDEV_CHANGEINFODATA:
1316 changed = mlx5_handle_changeinfodata_event(ldev, &tracker, ndev);
1317 break;
1318 }
1319
1320 ldev->tracker = tracker;
1321
1322 if (changed)
1323 mlx5_queue_bond_work(ldev, 0);
1324
1325 return NOTIFY_DONE;
1326 }
1327
mlx5_ldev_add_netdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev,struct net_device * netdev)1328 static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev,
1329 struct mlx5_core_dev *dev,
1330 struct net_device *netdev)
1331 {
1332 unsigned int fn = mlx5_get_dev_index(dev);
1333 unsigned long flags;
1334
1335 spin_lock_irqsave(&lag_lock, flags);
1336 ldev->pf[fn].netdev = netdev;
1337 ldev->tracker.netdev_state[fn].link_up = 0;
1338 ldev->tracker.netdev_state[fn].tx_enabled = 0;
1339 spin_unlock_irqrestore(&lag_lock, flags);
1340 }
1341
mlx5_ldev_remove_netdev(struct mlx5_lag * ldev,struct net_device * netdev)1342 static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev,
1343 struct net_device *netdev)
1344 {
1345 unsigned long flags;
1346 int i;
1347
1348 spin_lock_irqsave(&lag_lock, flags);
1349 mlx5_ldev_for_each(i, 0, ldev) {
1350 if (ldev->pf[i].netdev == netdev) {
1351 ldev->pf[i].netdev = NULL;
1352 break;
1353 }
1354 }
1355 spin_unlock_irqrestore(&lag_lock, flags);
1356 }
1357
mlx5_ldev_add_mdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev)1358 static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev,
1359 struct mlx5_core_dev *dev)
1360 {
1361 unsigned int fn = mlx5_get_dev_index(dev);
1362
1363 ldev->pf[fn].dev = dev;
1364 dev->priv.lag = ldev;
1365 }
1366
mlx5_ldev_remove_mdev(struct mlx5_lag * ldev,struct mlx5_core_dev * dev)1367 static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
1368 struct mlx5_core_dev *dev)
1369 {
1370 int fn;
1371
1372 fn = mlx5_get_dev_index(dev);
1373 if (ldev->pf[fn].dev != dev)
1374 return;
1375
1376 ldev->pf[fn].dev = NULL;
1377 dev->priv.lag = NULL;
1378 }
1379
1380 /* Must be called with HCA devcom component lock held */
__mlx5_lag_dev_add_mdev(struct mlx5_core_dev * dev)1381 static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev)
1382 {
1383 struct mlx5_devcom_comp_dev *pos = NULL;
1384 struct mlx5_lag *ldev = NULL;
1385 struct mlx5_core_dev *tmp_dev;
1386
1387 tmp_dev = mlx5_devcom_get_next_peer_data(dev->priv.hca_devcom_comp, &pos);
1388 if (tmp_dev)
1389 ldev = mlx5_lag_dev(tmp_dev);
1390
1391 if (!ldev) {
1392 ldev = mlx5_lag_dev_alloc(dev);
1393 if (!ldev) {
1394 mlx5_core_err(dev, "Failed to alloc lag dev\n");
1395 return 0;
1396 }
1397 mlx5_ldev_add_mdev(ldev, dev);
1398 return 0;
1399 }
1400
1401 mutex_lock(&ldev->lock);
1402 if (ldev->mode_changes_in_progress) {
1403 mutex_unlock(&ldev->lock);
1404 return -EAGAIN;
1405 }
1406 mlx5_ldev_get(ldev);
1407 mlx5_ldev_add_mdev(ldev, dev);
1408 mutex_unlock(&ldev->lock);
1409
1410 return 0;
1411 }
1412
mlx5_lag_unregister_hca_devcom_comp(struct mlx5_core_dev * dev)1413 static void mlx5_lag_unregister_hca_devcom_comp(struct mlx5_core_dev *dev)
1414 {
1415 mlx5_devcom_unregister_component(dev->priv.hca_devcom_comp);
1416 }
1417
mlx5_lag_register_hca_devcom_comp(struct mlx5_core_dev * dev)1418 static int mlx5_lag_register_hca_devcom_comp(struct mlx5_core_dev *dev)
1419 {
1420 struct mlx5_devcom_match_attr attr = {
1421 .flags = MLX5_DEVCOM_MATCH_FLAGS_NS,
1422 .net = mlx5_core_net(dev),
1423 };
1424 u8 len __always_unused;
1425
1426 mlx5_query_nic_sw_system_image_guid(dev, attr.key.buf, &len);
1427
1428 /* This component is use to sync adding core_dev to lag_dev and to sync
1429 * changes of mlx5_adev_devices between LAG layer and other layers.
1430 */
1431 dev->priv.hca_devcom_comp =
1432 mlx5_devcom_register_component(dev->priv.devc,
1433 MLX5_DEVCOM_HCA_PORTS,
1434 &attr, NULL, dev);
1435 if (!dev->priv.hca_devcom_comp) {
1436 mlx5_core_err(dev,
1437 "Failed to register devcom HCA component.");
1438 return -EINVAL;
1439 }
1440
1441 return 0;
1442 }
1443
mlx5_lag_remove_mdev(struct mlx5_core_dev * dev)1444 void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev)
1445 {
1446 struct mlx5_lag *ldev;
1447
1448 ldev = mlx5_lag_dev(dev);
1449 if (!ldev)
1450 return;
1451
1452 /* mdev is being removed, might as well remove debugfs
1453 * as early as possible.
1454 */
1455 mlx5_ldev_remove_debugfs(dev->priv.dbg.lag_debugfs);
1456 recheck:
1457 mutex_lock(&ldev->lock);
1458 if (ldev->mode_changes_in_progress) {
1459 mutex_unlock(&ldev->lock);
1460 msleep(100);
1461 goto recheck;
1462 }
1463 mlx5_ldev_remove_mdev(ldev, dev);
1464 mutex_unlock(&ldev->lock);
1465 mlx5_lag_unregister_hca_devcom_comp(dev);
1466 mlx5_ldev_put(ldev);
1467 }
1468
mlx5_lag_add_mdev(struct mlx5_core_dev * dev)1469 void mlx5_lag_add_mdev(struct mlx5_core_dev *dev)
1470 {
1471 int err;
1472
1473 if (!mlx5_lag_is_supported(dev))
1474 return;
1475
1476 if (mlx5_lag_register_hca_devcom_comp(dev))
1477 return;
1478
1479 recheck:
1480 mlx5_devcom_comp_lock(dev->priv.hca_devcom_comp);
1481 err = __mlx5_lag_dev_add_mdev(dev);
1482 mlx5_devcom_comp_unlock(dev->priv.hca_devcom_comp);
1483
1484 if (err) {
1485 msleep(100);
1486 goto recheck;
1487 }
1488 mlx5_ldev_add_debugfs(dev);
1489 }
1490
mlx5_lag_remove_netdev(struct mlx5_core_dev * dev,struct net_device * netdev)1491 void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev,
1492 struct net_device *netdev)
1493 {
1494 struct mlx5_lag *ldev;
1495 bool lag_is_active;
1496
1497 ldev = mlx5_lag_dev(dev);
1498 if (!ldev)
1499 return;
1500
1501 mutex_lock(&ldev->lock);
1502 mlx5_ldev_remove_netdev(ldev, netdev);
1503 clear_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
1504
1505 lag_is_active = __mlx5_lag_is_active(ldev);
1506 mutex_unlock(&ldev->lock);
1507
1508 if (lag_is_active)
1509 mlx5_queue_bond_work(ldev, 0);
1510 }
1511
mlx5_lag_add_netdev(struct mlx5_core_dev * dev,struct net_device * netdev)1512 void mlx5_lag_add_netdev(struct mlx5_core_dev *dev,
1513 struct net_device *netdev)
1514 {
1515 struct mlx5_lag *ldev;
1516 int num = 0;
1517
1518 ldev = mlx5_lag_dev(dev);
1519 if (!ldev)
1520 return;
1521
1522 mutex_lock(&ldev->lock);
1523 mlx5_ldev_add_netdev(ldev, dev, netdev);
1524 num = mlx5_lag_num_netdevs(ldev);
1525 if (num >= ldev->ports)
1526 set_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
1527 mutex_unlock(&ldev->lock);
1528 mlx5_queue_bond_work(ldev, 0);
1529 }
1530
mlx5_get_pre_ldev_func(struct mlx5_lag * ldev,int start_idx,int end_idx)1531 int mlx5_get_pre_ldev_func(struct mlx5_lag *ldev, int start_idx, int end_idx)
1532 {
1533 int i;
1534
1535 for (i = start_idx; i >= end_idx; i--)
1536 if (ldev->pf[i].dev)
1537 return i;
1538 return -1;
1539 }
1540
mlx5_get_next_ldev_func(struct mlx5_lag * ldev,int start_idx)1541 int mlx5_get_next_ldev_func(struct mlx5_lag *ldev, int start_idx)
1542 {
1543 int i;
1544
1545 for (i = start_idx; i < MLX5_MAX_PORTS; i++)
1546 if (ldev->pf[i].dev)
1547 return i;
1548 return MLX5_MAX_PORTS;
1549 }
1550
mlx5_lag_is_roce(struct mlx5_core_dev * dev)1551 bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
1552 {
1553 struct mlx5_lag *ldev;
1554 unsigned long flags;
1555 bool res;
1556
1557 spin_lock_irqsave(&lag_lock, flags);
1558 ldev = mlx5_lag_dev(dev);
1559 res = ldev && __mlx5_lag_is_roce(ldev);
1560 spin_unlock_irqrestore(&lag_lock, flags);
1561
1562 return res;
1563 }
1564 EXPORT_SYMBOL(mlx5_lag_is_roce);
1565
mlx5_lag_is_active(struct mlx5_core_dev * dev)1566 bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
1567 {
1568 struct mlx5_lag *ldev;
1569 unsigned long flags;
1570 bool res;
1571
1572 spin_lock_irqsave(&lag_lock, flags);
1573 ldev = mlx5_lag_dev(dev);
1574 res = ldev && __mlx5_lag_is_active(ldev);
1575 spin_unlock_irqrestore(&lag_lock, flags);
1576
1577 return res;
1578 }
1579 EXPORT_SYMBOL(mlx5_lag_is_active);
1580
mlx5_lag_mode_is_hash(struct mlx5_core_dev * dev)1581 bool mlx5_lag_mode_is_hash(struct mlx5_core_dev *dev)
1582 {
1583 struct mlx5_lag *ldev;
1584 unsigned long flags;
1585 bool res = 0;
1586
1587 spin_lock_irqsave(&lag_lock, flags);
1588 ldev = mlx5_lag_dev(dev);
1589 if (ldev)
1590 res = test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags);
1591 spin_unlock_irqrestore(&lag_lock, flags);
1592
1593 return res;
1594 }
1595 EXPORT_SYMBOL(mlx5_lag_mode_is_hash);
1596
mlx5_lag_is_master(struct mlx5_core_dev * dev)1597 bool mlx5_lag_is_master(struct mlx5_core_dev *dev)
1598 {
1599 struct mlx5_lag *ldev;
1600 unsigned long flags;
1601 bool res = false;
1602 int idx;
1603
1604 spin_lock_irqsave(&lag_lock, flags);
1605 ldev = mlx5_lag_dev(dev);
1606 idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1);
1607 res = ldev && __mlx5_lag_is_active(ldev) && idx >= 0 && dev == ldev->pf[idx].dev;
1608 spin_unlock_irqrestore(&lag_lock, flags);
1609
1610 return res;
1611 }
1612 EXPORT_SYMBOL(mlx5_lag_is_master);
1613
mlx5_lag_is_sriov(struct mlx5_core_dev * dev)1614 bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
1615 {
1616 struct mlx5_lag *ldev;
1617 unsigned long flags;
1618 bool res;
1619
1620 spin_lock_irqsave(&lag_lock, flags);
1621 ldev = mlx5_lag_dev(dev);
1622 res = ldev && __mlx5_lag_is_sriov(ldev);
1623 spin_unlock_irqrestore(&lag_lock, flags);
1624
1625 return res;
1626 }
1627 EXPORT_SYMBOL(mlx5_lag_is_sriov);
1628
mlx5_lag_is_shared_fdb(struct mlx5_core_dev * dev)1629 bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev)
1630 {
1631 struct mlx5_lag *ldev;
1632 unsigned long flags;
1633 bool res;
1634
1635 spin_lock_irqsave(&lag_lock, flags);
1636 ldev = mlx5_lag_dev(dev);
1637 res = ldev && test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
1638 spin_unlock_irqrestore(&lag_lock, flags);
1639
1640 return res;
1641 }
1642 EXPORT_SYMBOL(mlx5_lag_is_shared_fdb);
1643
mlx5_lag_disable_change(struct mlx5_core_dev * dev)1644 void mlx5_lag_disable_change(struct mlx5_core_dev *dev)
1645 {
1646 struct mlx5_lag *ldev;
1647
1648 ldev = mlx5_lag_dev(dev);
1649 if (!ldev)
1650 return;
1651
1652 mlx5_devcom_comp_lock(dev->priv.hca_devcom_comp);
1653 mutex_lock(&ldev->lock);
1654
1655 ldev->mode_changes_in_progress++;
1656 if (__mlx5_lag_is_active(ldev))
1657 mlx5_disable_lag(ldev);
1658
1659 mutex_unlock(&ldev->lock);
1660 mlx5_devcom_comp_unlock(dev->priv.hca_devcom_comp);
1661 }
1662
mlx5_lag_enable_change(struct mlx5_core_dev * dev)1663 void mlx5_lag_enable_change(struct mlx5_core_dev *dev)
1664 {
1665 struct mlx5_lag *ldev;
1666
1667 ldev = mlx5_lag_dev(dev);
1668 if (!ldev)
1669 return;
1670
1671 mutex_lock(&ldev->lock);
1672 ldev->mode_changes_in_progress--;
1673 mutex_unlock(&ldev->lock);
1674 mlx5_queue_bond_work(ldev, 0);
1675 }
1676
mlx5_lag_get_slave_port(struct mlx5_core_dev * dev,struct net_device * slave)1677 u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
1678 struct net_device *slave)
1679 {
1680 struct mlx5_lag *ldev;
1681 unsigned long flags;
1682 u8 port = 0;
1683 int i;
1684
1685 spin_lock_irqsave(&lag_lock, flags);
1686 ldev = mlx5_lag_dev(dev);
1687 if (!(ldev && __mlx5_lag_is_roce(ldev)))
1688 goto unlock;
1689
1690 mlx5_ldev_for_each(i, 0, ldev) {
1691 if (ldev->pf[i].netdev == slave) {
1692 port = i;
1693 break;
1694 }
1695 }
1696
1697 port = ldev->v2p_map[port * ldev->buckets];
1698
1699 unlock:
1700 spin_unlock_irqrestore(&lag_lock, flags);
1701 return port;
1702 }
1703 EXPORT_SYMBOL(mlx5_lag_get_slave_port);
1704
mlx5_lag_get_num_ports(struct mlx5_core_dev * dev)1705 u8 mlx5_lag_get_num_ports(struct mlx5_core_dev *dev)
1706 {
1707 struct mlx5_lag *ldev;
1708
1709 ldev = mlx5_lag_dev(dev);
1710 if (!ldev)
1711 return 0;
1712
1713 return ldev->ports;
1714 }
1715 EXPORT_SYMBOL(mlx5_lag_get_num_ports);
1716
mlx5_lag_get_next_peer_mdev(struct mlx5_core_dev * dev,int * i)1717 struct mlx5_core_dev *mlx5_lag_get_next_peer_mdev(struct mlx5_core_dev *dev, int *i)
1718 {
1719 struct mlx5_core_dev *peer_dev = NULL;
1720 struct mlx5_lag *ldev;
1721 unsigned long flags;
1722 int idx;
1723
1724 spin_lock_irqsave(&lag_lock, flags);
1725 ldev = mlx5_lag_dev(dev);
1726 if (!ldev)
1727 goto unlock;
1728
1729 if (*i == MLX5_MAX_PORTS)
1730 goto unlock;
1731 mlx5_ldev_for_each(idx, *i, ldev)
1732 if (ldev->pf[idx].dev != dev)
1733 break;
1734
1735 if (idx == MLX5_MAX_PORTS) {
1736 *i = idx;
1737 goto unlock;
1738 }
1739 *i = idx + 1;
1740
1741 peer_dev = ldev->pf[idx].dev;
1742
1743 unlock:
1744 spin_unlock_irqrestore(&lag_lock, flags);
1745 return peer_dev;
1746 }
1747 EXPORT_SYMBOL(mlx5_lag_get_next_peer_mdev);
1748
mlx5_lag_query_cong_counters(struct mlx5_core_dev * dev,u64 * values,int num_counters,size_t * offsets)1749 int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
1750 u64 *values,
1751 int num_counters,
1752 size_t *offsets)
1753 {
1754 int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out);
1755 struct mlx5_core_dev **mdev;
1756 int ret = 0, i, j, idx = 0;
1757 struct mlx5_lag *ldev;
1758 unsigned long flags;
1759 int num_ports;
1760 void *out;
1761
1762 out = kvzalloc(outlen, GFP_KERNEL);
1763 if (!out)
1764 return -ENOMEM;
1765
1766 mdev = kvzalloc(sizeof(mdev[0]) * MLX5_MAX_PORTS, GFP_KERNEL);
1767 if (!mdev) {
1768 ret = -ENOMEM;
1769 goto free_out;
1770 }
1771
1772 memset(values, 0, sizeof(*values) * num_counters);
1773
1774 spin_lock_irqsave(&lag_lock, flags);
1775 ldev = mlx5_lag_dev(dev);
1776 if (ldev && __mlx5_lag_is_active(ldev)) {
1777 num_ports = ldev->ports;
1778 mlx5_ldev_for_each(i, 0, ldev)
1779 mdev[idx++] = ldev->pf[i].dev;
1780 } else {
1781 num_ports = 1;
1782 mdev[MLX5_LAG_P1] = dev;
1783 }
1784 spin_unlock_irqrestore(&lag_lock, flags);
1785
1786 for (i = 0; i < num_ports; ++i) {
1787 u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {};
1788
1789 MLX5_SET(query_cong_statistics_in, in, opcode,
1790 MLX5_CMD_OP_QUERY_CONG_STATISTICS);
1791 ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in,
1792 out);
1793 if (ret)
1794 goto free_mdev;
1795
1796 for (j = 0; j < num_counters; ++j)
1797 values[j] += be64_to_cpup((__be64 *)(out + offsets[j]));
1798 }
1799
1800 free_mdev:
1801 kvfree(mdev);
1802 free_out:
1803 kvfree(out);
1804 return ret;
1805 }
1806 EXPORT_SYMBOL(mlx5_lag_query_cong_counters);
1807