1 /* 2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <linux/netdevice.h> 34 #include <net/bonding.h> 35 #include <linux/mlx5/driver.h> 36 #include <linux/mlx5/eswitch.h> 37 #include <linux/mlx5/vport.h> 38 #include <linux/mlx5/lag.h> 39 #include "lib/mlx5.h" 40 #include "lib/devcom.h" 41 #include "mlx5_core.h" 42 #include "eswitch.h" 43 #include "esw/acl/ofld.h" 44 #include "lag.h" 45 #include "mp.h" 46 #include "mpesw.h" 47 48 49 /* General purpose, use for short periods of time. 50 * Beware of lock dependencies (preferably, no locks should be acquired 51 * under it). 52 */ 53 static DEFINE_SPINLOCK(lag_lock); 54 55 static int get_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags) 56 { 57 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) 58 return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT; 59 60 if (mode == MLX5_LAG_MODE_MPESW) 61 return MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW; 62 63 return MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY; 64 } 65 66 static u8 lag_active_port_bits(struct mlx5_lag *ldev) 67 { 68 u8 enabled_ports[MLX5_MAX_PORTS] = {}; 69 u8 active_port = 0; 70 int num_enabled; 71 int idx; 72 73 mlx5_infer_tx_enabled(&ldev->tracker, ldev, enabled_ports, 74 &num_enabled); 75 for (idx = 0; idx < num_enabled; idx++) 76 active_port |= BIT_MASK(enabled_ports[idx]); 77 78 return active_port; 79 } 80 81 static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, struct mlx5_lag *ldev, 82 int mode, unsigned long flags) 83 { 84 bool fdb_sel_mode = test_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, 85 &flags); 86 int port_sel_mode = get_port_sel_mode(mode, flags); 87 u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {}; 88 u8 *ports = ldev->v2p_map; 89 int idx0, idx1; 90 void *lag_ctx; 91 92 lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx); 93 MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG); 94 MLX5_SET(lagc, lag_ctx, fdb_selection_mode, fdb_sel_mode); 95 idx0 = mlx5_lag_get_dev_index_by_seq(ldev, 0); 96 idx1 = mlx5_lag_get_dev_index_by_seq(ldev, 1); 97 98 if (idx0 < 0 || idx1 < 0) 99 return -EINVAL; 100 101 switch (port_sel_mode) { 102 case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY: 103 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[idx0]); 104 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[idx1]); 105 break; 106 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT: 107 if (!MLX5_CAP_PORT_SELECTION(dev, port_select_flow_table_bypass)) 108 break; 109 110 MLX5_SET(lagc, lag_ctx, active_port, 111 lag_active_port_bits(mlx5_lag_dev(dev))); 112 break; 113 default: 114 break; 115 } 116 MLX5_SET(lagc, lag_ctx, port_select_mode, port_sel_mode); 117 118 return mlx5_cmd_exec_in(dev, create_lag, in); 119 } 120 121 static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, struct mlx5_lag *ldev, 122 u8 *ports) 123 { 124 u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {}; 125 void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx); 126 int idx0, idx1; 127 128 idx0 = mlx5_lag_get_dev_index_by_seq(ldev, 0); 129 idx1 = mlx5_lag_get_dev_index_by_seq(ldev, 1); 130 if (idx0 < 0 || idx1 < 0) 131 return -EINVAL; 132 133 MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG); 134 MLX5_SET(modify_lag_in, in, field_select, 0x1); 135 136 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[idx0]); 137 MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[idx1]); 138 139 return mlx5_cmd_exec_in(dev, modify_lag, in); 140 } 141 142 static u32 mlx5_lag_dev_group_id(struct mlx5_core_dev *dev) 143 { 144 struct mlx5_lag *ldev = mlx5_lag_dev(dev); 145 struct lag_func *pf; 146 int i; 147 148 if (!ldev) 149 return 0; 150 151 mlx5_lag_for_each(i, 0, ldev, MLX5_LAG_FILTER_ALL) { 152 pf = mlx5_lag_pf(ldev, i); 153 if (pf->dev == dev) 154 return pf->sd_fdb_active ? pf->group_id : 0; 155 } 156 return 0; 157 } 158 159 static int mlx5_lag_is_sw_lag(struct mlx5_core_dev *dev) 160 { 161 return mlx5_lag_is_sd(dev); 162 } 163 164 int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev) 165 { 166 u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {}; 167 struct mlx5_lag *ldev = mlx5_lag_dev(dev); 168 int ret; 169 170 if (mlx5_lag_is_sw_lag(dev)) { 171 if (!ldev) 172 return -ENODEV; 173 174 mutex_lock(&ldev->lock); 175 ret = mlx5_lag_create_vport_lag(mlx5_lag_dev(dev), 176 mlx5_lag_dev_group_id(dev)); 177 mutex_unlock(&ldev->lock); 178 return ret; 179 } 180 181 MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG); 182 183 return mlx5_cmd_exec_in(dev, create_vport_lag, in); 184 } 185 EXPORT_SYMBOL(mlx5_cmd_create_vport_lag); 186 187 int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev) 188 { 189 u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {}; 190 struct mlx5_lag *ldev = mlx5_lag_dev(dev); 191 192 if (mlx5_lag_is_sw_lag(dev)) { 193 if (!ldev) 194 return 0; 195 196 mutex_lock(&ldev->lock); 197 mlx5_lag_destroy_vport_lag(mlx5_lag_dev(dev), 198 mlx5_lag_dev_group_id(dev)); 199 mutex_unlock(&ldev->lock); 200 return 0; 201 } 202 203 MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG); 204 205 return mlx5_cmd_exec_in(dev, destroy_vport_lag, in); 206 } 207 EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag); 208 209 static void mlx5_infer_tx_disabled(struct lag_tracker *tracker, struct mlx5_lag *ldev, 210 u8 *ports, int *num_disabled) 211 { 212 int i; 213 214 *num_disabled = 0; 215 mlx5_ldev_for_each(i, 0, ldev) 216 if (!tracker->netdev_state[i].tx_enabled || 217 !tracker->netdev_state[i].link_up) 218 ports[(*num_disabled)++] = i; 219 } 220 221 void mlx5_infer_tx_enabled(struct lag_tracker *tracker, struct mlx5_lag *ldev, 222 u8 *ports, int *num_enabled) 223 { 224 int i; 225 226 *num_enabled = 0; 227 mlx5_ldev_for_each(i, 0, ldev) 228 if (tracker->netdev_state[i].tx_enabled && 229 tracker->netdev_state[i].link_up) 230 ports[(*num_enabled)++] = i; 231 232 if (*num_enabled == 0) 233 mlx5_infer_tx_disabled(tracker, ldev, ports, num_enabled); 234 } 235 236 static void mlx5_lag_print_mapping(struct mlx5_core_dev *dev, 237 struct mlx5_lag *ldev, 238 struct lag_tracker *tracker, 239 unsigned long flags) 240 { 241 char buf[MLX5_MAX_PORTS * 10 + 1] = {}; 242 u8 enabled_ports[MLX5_MAX_PORTS] = {}; 243 int written = 0; 244 int num_enabled; 245 int idx; 246 int err; 247 int i; 248 int j; 249 250 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) { 251 mlx5_infer_tx_enabled(tracker, ldev, enabled_ports, 252 &num_enabled); 253 for (i = 0; i < num_enabled; i++) { 254 err = scnprintf(buf + written, 4, "%d, ", enabled_ports[i] + 1); 255 if (err != 3) 256 return; 257 written += err; 258 } 259 buf[written - 2] = 0; 260 mlx5_core_info(dev, "lag map active ports: %s\n", buf); 261 } else { 262 mlx5_ldev_for_each(i, 0, ldev) { 263 for (j = 0; j < ldev->buckets; j++) { 264 idx = i * ldev->buckets + j; 265 err = scnprintf(buf + written, 10, 266 " port %d:%d", i + 1, ldev->v2p_map[idx]); 267 if (err != 9) 268 return; 269 written += err; 270 } 271 } 272 mlx5_core_info(dev, "lag map:%s\n", buf); 273 } 274 } 275 276 static int mlx5_lag_netdev_event(struct notifier_block *this, 277 unsigned long event, void *ptr); 278 static void mlx5_do_bond_work(struct work_struct *work); 279 280 static void mlx5_ldev_free(struct kref *ref) 281 { 282 struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref); 283 struct lag_func *pf; 284 struct net *net; 285 int i; 286 287 if (ldev->nb.notifier_call) { 288 net = read_pnet(&ldev->net); 289 unregister_netdevice_notifier_net(net, &ldev->nb); 290 } 291 292 mlx5_lag_for_each(i, 0, ldev, MLX5_LAG_FILTER_ALL) { 293 pf = mlx5_lag_pf(ldev, i); 294 if (pf->port_change_nb.nb.notifier_call) { 295 struct mlx5_nb *nb = &pf->port_change_nb; 296 297 mlx5_eq_notifier_unregister(pf->dev, nb); 298 } 299 xa_erase(&ldev->pfs, i); 300 kfree(pf); 301 } 302 xa_destroy(&ldev->pfs); 303 304 mlx5_lag_mp_cleanup(ldev); 305 cancel_delayed_work_sync(&ldev->bond_work); 306 cancel_work_sync(&ldev->speed_update_work); 307 destroy_workqueue(ldev->wq); 308 mutex_destroy(&ldev->lock); 309 kfree(ldev); 310 } 311 312 static void mlx5_ldev_put(struct mlx5_lag *ldev) 313 { 314 kref_put(&ldev->ref, mlx5_ldev_free); 315 } 316 317 static void mlx5_ldev_get(struct mlx5_lag *ldev) 318 { 319 kref_get(&ldev->ref); 320 } 321 322 static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev) 323 { 324 struct mlx5_lag *ldev; 325 int err; 326 327 ldev = kzalloc_obj(*ldev); 328 if (!ldev) 329 return NULL; 330 331 ldev->wq = create_singlethread_workqueue("mlx5_lag"); 332 if (!ldev->wq) { 333 kfree(ldev); 334 return NULL; 335 } 336 337 kref_init(&ldev->ref); 338 mutex_init(&ldev->lock); 339 xa_init_flags(&ldev->pfs, XA_FLAGS_ALLOC); 340 INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work); 341 INIT_WORK(&ldev->speed_update_work, mlx5_mpesw_speed_update_work); 342 343 if (!mlx5_sd_is_supported(dev)) { 344 ldev->nb.notifier_call = mlx5_lag_netdev_event; 345 write_pnet(&ldev->net, mlx5_core_net(dev)); 346 if (register_netdevice_notifier_net(read_pnet(&ldev->net), 347 &ldev->nb)) { 348 ldev->nb.notifier_call = NULL; 349 mlx5_core_err(dev, "Failed to register LAG netdev notifier\n"); 350 } 351 } 352 ldev->mode = MLX5_LAG_MODE_NONE; 353 354 err = mlx5_lag_mp_init(ldev); 355 if (err) 356 mlx5_core_err(dev, "Failed to init multipath lag err=%d\n", 357 err); 358 359 ldev->ports = MLX5_CAP_GEN(dev, num_lag_ports); 360 ldev->buckets = 1; 361 362 return ldev; 363 } 364 365 int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, 366 struct net_device *ndev) 367 { 368 struct lag_func *pf; 369 int i; 370 371 mlx5_ldev_for_each(i, 0, ldev) { 372 pf = mlx5_lag_pf(ldev, i); 373 if (pf->netdev == ndev) 374 return i; 375 } 376 377 return -ENOENT; 378 } 379 380 static int mlx5_lag_get_master_idx(struct mlx5_lag *ldev) 381 { 382 unsigned long idx = 0; 383 void *entry; 384 385 if (!ldev) 386 return -ENOENT; 387 388 entry = xa_find(&ldev->pfs, &idx, U8_MAX, MLX5_LAG_XA_MARK_MASTER); 389 if (!entry) 390 return -ENOENT; 391 392 return (int)idx; 393 } 394 395 int mlx5_lag_get_dev_index_by_seq(struct mlx5_lag *ldev, int seq) 396 { 397 int master_idx, i, num = 0; 398 399 if (!ldev) 400 return -ENOENT; 401 402 master_idx = mlx5_lag_get_master_idx(ldev); 403 404 /* If seq 0 is requested and there's a primary PF, return it */ 405 if (master_idx >= 0) { 406 if (seq == 0) 407 return master_idx; 408 num++; 409 } 410 411 mlx5_ldev_for_each(i, 0, ldev) { 412 /* Skip the primary PF in the loop */ 413 if (i == master_idx) 414 continue; 415 416 if (num == seq) 417 return i; 418 num++; 419 } 420 return -ENOENT; 421 } 422 423 /* Return the appropriate iterator filter for a device in LAG: 424 * - SD shared FDB active: iterate only the device's SD group 425 * - SD group exists but shared FDB not active: iterate all devices 426 * - No SD: iterate ports only 427 */ 428 static u32 mlx5_lag_get_filter(struct mlx5_lag *ldev, struct mlx5_core_dev *dev) 429 { 430 struct lag_func *pf = mlx5_lag_pf_by_dev(ldev, dev); 431 432 if (pf && pf->sd_fdb_active) 433 return pf->group_id; 434 if (pf && pf->group_id) 435 return MLX5_LAG_FILTER_ALL; 436 return MLX5_LAG_FILTER_PORTS; 437 } 438 439 /* Reverse of mlx5_lag_get_dev_index_by_seq: given a device, return its 440 * sequence number in the LAG. Master is always 0, others numbered 441 * sequentially starting from 1. 442 */ 443 int mlx5_lag_get_dev_seq(struct mlx5_core_dev *dev) 444 { 445 struct mlx5_lag *ldev = mlx5_lag_dev(dev); 446 int master_idx, i, num = 1; 447 struct lag_func *pf; 448 u32 filter; 449 450 if (!ldev) 451 return -ENOENT; 452 453 filter = mlx5_lag_get_filter(ldev, dev); 454 master_idx = mlx5_lag_get_dev_index_by_seq_filter(ldev, 0, filter); 455 if (master_idx < 0) 456 return -ENOENT; 457 458 pf = mlx5_lag_pf(ldev, master_idx); 459 if (pf && pf->dev == dev) 460 return 0; 461 462 mlx5_lag_for_each(i, 0, ldev, filter) { 463 if (i == master_idx) 464 continue; 465 pf = mlx5_lag_pf(ldev, i); 466 if (pf->dev == dev) 467 return num; 468 num++; 469 } 470 return -ENOENT; 471 } 472 EXPORT_SYMBOL(mlx5_lag_get_dev_seq); 473 474 /* seq 0 = master, then all remaining devices */ 475 static int mlx5_lag_get_dev_index_by_seq_all(struct mlx5_lag *ldev, int seq) 476 { 477 int master_idx, i, num = 0; 478 479 master_idx = mlx5_lag_get_master_idx(ldev); 480 481 if (master_idx >= 0) { 482 if (seq == 0) 483 return master_idx; 484 num++; 485 } 486 487 mlx5_lag_for_each(i, 0, ldev, MLX5_LAG_FILTER_ALL) { 488 if (i == master_idx) 489 continue; 490 if (num == seq) 491 return i; 492 num++; 493 } 494 return -ENOENT; 495 } 496 497 /* From group POV, port-marked entry is the lag master */ 498 static int mlx5_lag_get_dev_index_by_seq_group(struct mlx5_lag *ldev, int seq, 499 u32 group_id) 500 { 501 int i, num = 0; 502 503 mlx5_lag_for_each(i, 0, ldev, group_id) { 504 if (xa_get_mark(&ldev->pfs, i, MLX5_LAG_XA_MARK_PORT)) { 505 if (seq == 0) 506 return i; 507 num++; 508 break; 509 } 510 } 511 512 mlx5_lag_for_each(i, 0, ldev, group_id) { 513 if (xa_get_mark(&ldev->pfs, i, MLX5_LAG_XA_MARK_PORT)) 514 continue; 515 if (num == seq) 516 return i; 517 num++; 518 } 519 return -ENOENT; 520 } 521 522 int mlx5_lag_get_dev_index_by_seq_filter(struct mlx5_lag *ldev, int seq, 523 u32 filter) 524 { 525 if (!ldev) 526 return -ENOENT; 527 528 if (!filter || filter == MLX5_LAG_FILTER_PORTS) 529 return mlx5_lag_get_dev_index_by_seq(ldev, seq); 530 531 if (filter == MLX5_LAG_FILTER_ALL) 532 return mlx5_lag_get_dev_index_by_seq_all(ldev, seq); 533 534 return mlx5_lag_get_dev_index_by_seq_group(ldev, seq, filter); 535 } 536 537 /* Devcom events for LAG master marking */ 538 #define LAG_DEVCOM_PAIR (0) 539 #define LAG_DEVCOM_UNPAIR (1) 540 541 static void mlx5_lag_mark_master(struct mlx5_lag *ldev) 542 { 543 int lowest_dev_idx = INT_MAX; 544 struct lag_func *pf; 545 int master_xa_idx = -1; 546 int dev_idx; 547 int i; 548 549 mlx5_ldev_for_each(i, 0, ldev) { 550 pf = mlx5_lag_pf(ldev, i); 551 dev_idx = mlx5_get_dev_index(pf->dev); 552 if (dev_idx < lowest_dev_idx) { 553 lowest_dev_idx = dev_idx; 554 master_xa_idx = i; 555 } 556 } 557 558 if (master_xa_idx >= 0) 559 xa_set_mark(&ldev->pfs, master_xa_idx, MLX5_LAG_XA_MARK_MASTER); 560 } 561 562 static void mlx5_lag_clear_master(struct mlx5_lag *ldev) 563 { 564 unsigned long idx = 0; 565 void *entry; 566 567 entry = xa_find(&ldev->pfs, &idx, U8_MAX, MLX5_LAG_XA_MARK_MASTER); 568 if (!entry) 569 return; 570 571 xa_clear_mark(&ldev->pfs, idx, MLX5_LAG_XA_MARK_MASTER); 572 } 573 574 /* Devcom event handler to manage LAG master marking */ 575 static int mlx5_lag_devcom_event(int event, void *my_data, void *event_data) 576 { 577 struct mlx5_core_dev *dev = my_data; 578 struct mlx5_lag *ldev; 579 int idx; 580 581 ldev = mlx5_lag_dev(dev); 582 if (!ldev) 583 return 0; 584 585 mutex_lock(&ldev->lock); 586 switch (event) { 587 case LAG_DEVCOM_PAIR: 588 /* No need to mark more than once */ 589 idx = mlx5_lag_get_master_idx(ldev); 590 if (idx >= 0) 591 break; 592 /* Check if all LAG ports are now registered */ 593 if (mlx5_lag_num_devs(ldev) == ldev->ports) 594 mlx5_lag_mark_master(ldev); 595 break; 596 597 case LAG_DEVCOM_UNPAIR: 598 /* Clear master mark when a device is removed */ 599 mlx5_lag_clear_master(ldev); 600 break; 601 } 602 mutex_unlock(&ldev->lock); 603 return 0; 604 } 605 606 int mlx5_lag_num_devs(struct mlx5_lag *ldev) 607 { 608 int i, num = 0; 609 610 if (!ldev) 611 return 0; 612 613 mlx5_ldev_for_each(i, 0, ldev) { 614 (void)i; 615 num++; 616 } 617 return num; 618 } 619 620 int mlx5_lag_num_netdevs(struct mlx5_lag *ldev) 621 { 622 struct lag_func *pf; 623 int i, num = 0; 624 625 if (!ldev) 626 return 0; 627 628 mlx5_ldev_for_each(i, 0, ldev) { 629 pf = mlx5_lag_pf(ldev, i); 630 if (pf->netdev) 631 num++; 632 } 633 return num; 634 } 635 636 static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev) 637 { 638 return ldev->mode == MLX5_LAG_MODE_ROCE; 639 } 640 641 static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev) 642 { 643 return ldev->mode == MLX5_LAG_MODE_SRIOV; 644 } 645 646 static bool __mlx5_lag_is_sd_active(struct mlx5_lag *ldev, 647 struct mlx5_core_dev *dev) 648 { 649 struct lag_func *pf = mlx5_lag_pf_by_dev(ldev, dev); 650 651 return pf && pf->sd_fdb_active; 652 } 653 654 /* Create a mapping between steering slots and active ports. 655 * As we have ldev->buckets slots per port first assume the native 656 * mapping should be used. 657 * If there are ports that are disabled fill the relevant slots 658 * with mapping that points to active ports. 659 */ 660 static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, 661 struct mlx5_lag *ldev, 662 u8 buckets, 663 u8 *ports) 664 { 665 int disabled[MLX5_MAX_PORTS] = {}; 666 int enabled[MLX5_MAX_PORTS] = {}; 667 int disabled_ports_num = 0; 668 int enabled_ports_num = 0; 669 int idx; 670 u32 rand; 671 int i; 672 int j; 673 674 mlx5_ldev_for_each(i, 0, ldev) { 675 if (tracker->netdev_state[i].tx_enabled && 676 tracker->netdev_state[i].link_up) 677 enabled[enabled_ports_num++] = i; 678 else 679 disabled[disabled_ports_num++] = i; 680 } 681 682 /* Use native mapping by default where each port's buckets 683 * point the native port: 1 1 1 .. 1 2 2 2 ... 2 3 3 3 ... 3 etc 684 * ports[] values are 1-indexed device indices for FW. 685 */ 686 mlx5_ldev_for_each(i, 0, ldev) { 687 for (j = 0; j < buckets; j++) { 688 idx = i * buckets + j; 689 ports[idx] = mlx5_lag_xa_to_dev_idx(ldev, i) + 1; 690 } 691 } 692 693 /* If all ports are disabled/enabled keep native mapping */ 694 if (enabled_ports_num == ldev->ports || 695 disabled_ports_num == ldev->ports) 696 return; 697 698 /* Go over the disabled ports and for each assign a random active port */ 699 for (i = 0; i < disabled_ports_num; i++) { 700 for (j = 0; j < buckets; j++) { 701 int rand_xa_idx; 702 703 get_random_bytes(&rand, 4); 704 rand_xa_idx = enabled[rand % enabled_ports_num]; 705 ports[disabled[i] * buckets + j] = 706 mlx5_lag_xa_to_dev_idx(ldev, rand_xa_idx) + 1; 707 } 708 } 709 } 710 711 static bool mlx5_lag_has_drop_rule(struct mlx5_lag *ldev) 712 { 713 struct lag_func *pf; 714 int i; 715 716 mlx5_ldev_for_each(i, 0, ldev) { 717 pf = mlx5_lag_pf(ldev, i); 718 if (pf->has_drop) 719 return true; 720 } 721 return false; 722 } 723 724 static void mlx5_lag_drop_rule_cleanup(struct mlx5_lag *ldev) 725 { 726 struct lag_func *pf; 727 int i; 728 729 mlx5_ldev_for_each(i, 0, ldev) { 730 pf = mlx5_lag_pf(ldev, i); 731 if (!pf->has_drop) 732 continue; 733 734 mlx5_esw_acl_ingress_vport_drop_rule_destroy(pf->dev->priv.eswitch, 735 MLX5_VPORT_UPLINK); 736 pf->has_drop = false; 737 } 738 } 739 740 static void mlx5_lag_drop_rule_setup(struct mlx5_lag *ldev, 741 struct lag_tracker *tracker) 742 { 743 u8 disabled_ports[MLX5_MAX_PORTS] = {}; 744 struct mlx5_core_dev *dev; 745 struct lag_func *pf; 746 int disabled_index; 747 int num_disabled; 748 int err; 749 int i; 750 751 /* First delete the current drop rule so there won't be any dropped 752 * packets 753 */ 754 mlx5_lag_drop_rule_cleanup(ldev); 755 756 if (!ldev->tracker.has_inactive) 757 return; 758 759 mlx5_infer_tx_disabled(tracker, ldev, disabled_ports, &num_disabled); 760 761 for (i = 0; i < num_disabled; i++) { 762 disabled_index = disabled_ports[i]; 763 pf = mlx5_lag_pf(ldev, disabled_index); 764 dev = pf->dev; 765 err = mlx5_esw_acl_ingress_vport_drop_rule_create(dev->priv.eswitch, 766 MLX5_VPORT_UPLINK); 767 if (!err) 768 pf->has_drop = true; 769 else 770 mlx5_core_err(dev, 771 "Failed to create lag drop rule, error: %d", err); 772 } 773 } 774 775 static int mlx5_cmd_modify_active_port(struct mlx5_core_dev *dev, u8 ports) 776 { 777 u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {}; 778 void *lag_ctx; 779 780 lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx); 781 782 MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG); 783 MLX5_SET(modify_lag_in, in, field_select, 0x2); 784 785 MLX5_SET(lagc, lag_ctx, active_port, ports); 786 787 return mlx5_cmd_exec_in(dev, modify_lag, in); 788 } 789 790 static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports) 791 { 792 int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); 793 struct mlx5_core_dev *dev0; 794 u8 active_ports; 795 int ret; 796 797 if (idx < 0) 798 return -EINVAL; 799 800 dev0 = mlx5_lag_pf(ldev, idx)->dev; 801 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags)) { 802 ret = mlx5_lag_port_sel_modify(ldev, ports); 803 if (ret || 804 !MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table_bypass)) 805 return ret; 806 807 active_ports = lag_active_port_bits(ldev); 808 809 return mlx5_cmd_modify_active_port(dev0, active_ports); 810 } 811 return mlx5_cmd_modify_lag(dev0, ldev, ports); 812 } 813 814 static struct net_device *mlx5_lag_active_backup_get_netdev(struct mlx5_core_dev *dev) 815 { 816 struct net_device *ndev = NULL; 817 struct lag_func *pf; 818 struct mlx5_lag *ldev; 819 unsigned long flags; 820 int i, last_idx; 821 822 spin_lock_irqsave(&lag_lock, flags); 823 ldev = mlx5_lag_dev(dev); 824 825 if (!ldev) 826 goto unlock; 827 828 mlx5_ldev_for_each(i, 0, ldev) { 829 pf = mlx5_lag_pf(ldev, i); 830 if (ldev->tracker.netdev_state[i].tx_enabled) 831 ndev = pf->netdev; 832 } 833 if (!ndev) { 834 last_idx = mlx5_lag_get_dev_index_by_seq(ldev, ldev->ports - 1); 835 if (last_idx < 0) 836 goto unlock; 837 pf = mlx5_lag_pf(ldev, last_idx); 838 ndev = pf->netdev; 839 } 840 841 dev_hold(ndev); 842 843 unlock: 844 spin_unlock_irqrestore(&lag_lock, flags); 845 846 return ndev; 847 } 848 849 void mlx5_modify_lag(struct mlx5_lag *ldev, 850 struct lag_tracker *tracker) 851 { 852 int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); 853 u8 ports[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS] = {}; 854 struct mlx5_core_dev *dev0; 855 int idx; 856 int err; 857 int i; 858 int j; 859 860 if (first_idx < 0) 861 return; 862 863 dev0 = mlx5_lag_pf(ldev, first_idx)->dev; 864 mlx5_infer_tx_affinity_mapping(tracker, ldev, ldev->buckets, ports); 865 866 mlx5_ldev_for_each(i, 0, ldev) { 867 for (j = 0; j < ldev->buckets; j++) { 868 idx = i * ldev->buckets + j; 869 if (ports[idx] == ldev->v2p_map[idx]) 870 continue; 871 err = _mlx5_modify_lag(ldev, ports); 872 if (err) { 873 mlx5_core_err(dev0, 874 "Failed to modify LAG (%d)\n", 875 err); 876 return; 877 } 878 memcpy(ldev->v2p_map, ports, sizeof(ports)); 879 880 mlx5_lag_print_mapping(dev0, ldev, tracker, 881 ldev->mode_flags); 882 break; 883 } 884 } 885 886 if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) { 887 struct net_device *ndev = mlx5_lag_active_backup_get_netdev(dev0); 888 889 if(!(ldev->mode == MLX5_LAG_MODE_ROCE)) 890 mlx5_lag_drop_rule_setup(ldev, tracker); 891 /** Only sriov and roce lag should have tracker->tx_type set so 892 * no need to check the mode 893 */ 894 blocking_notifier_call_chain(&dev0->priv.lag_nh, 895 MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE, 896 ndev); 897 dev_put(ndev); 898 } 899 } 900 901 static int mlx5_lag_set_port_sel_mode(struct mlx5_lag *ldev, 902 enum mlx5_lag_mode mode, 903 unsigned long *flags) 904 { 905 int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); 906 struct mlx5_core_dev *dev0; 907 908 if (first_idx < 0) 909 return -EINVAL; 910 911 if (mode == MLX5_LAG_MODE_MPESW || 912 mode == MLX5_LAG_MODE_MULTIPATH) 913 return 0; 914 915 dev0 = mlx5_lag_pf(ldev, first_idx)->dev; 916 917 if (!MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table)) { 918 if (ldev->ports > 2) 919 return -EINVAL; 920 return 0; 921 } 922 923 if (ldev->ports > 2) 924 ldev->buckets = MLX5_LAG_MAX_HASH_BUCKETS; 925 926 set_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, flags); 927 928 return 0; 929 } 930 931 static int mlx5_lag_set_flags(struct mlx5_lag *ldev, enum mlx5_lag_mode mode, 932 struct lag_tracker *tracker, bool shared_fdb, 933 unsigned long *flags) 934 { 935 *flags = 0; 936 if (shared_fdb) { 937 set_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, flags); 938 set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags); 939 } 940 941 if (mode == MLX5_LAG_MODE_MPESW) 942 set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags); 943 944 return mlx5_lag_set_port_sel_mode(ldev, mode, flags); 945 } 946 947 char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags) 948 { 949 int port_sel_mode = get_port_sel_mode(mode, flags); 950 951 switch (port_sel_mode) { 952 case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY: return "queue_affinity"; 953 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT: return "hash"; 954 case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_MPESW: return "mpesw"; 955 default: return "invalid"; 956 } 957 } 958 959 static int mlx5_create_lag(struct mlx5_lag *ldev, 960 struct lag_tracker *tracker, 961 enum mlx5_lag_mode mode, 962 unsigned long flags) 963 { 964 int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); 965 bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags); 966 u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {}; 967 struct mlx5_core_dev *dev0; 968 int err; 969 970 if (first_idx < 0) 971 return -EINVAL; 972 973 dev0 = mlx5_lag_pf(ldev, first_idx)->dev; 974 if (tracker) 975 mlx5_lag_print_mapping(dev0, ldev, tracker, flags); 976 mlx5_core_info(dev0, "shared_fdb:%d mode:%s\n", 977 shared_fdb, mlx5_get_str_port_sel_mode(mode, flags)); 978 979 err = mlx5_cmd_create_lag(dev0, ldev, mode, flags); 980 if (err) { 981 mlx5_core_err(dev0, 982 "Failed to create LAG (%d)\n", 983 err); 984 return err; 985 } 986 987 if (shared_fdb) { 988 err = mlx5_lag_create_single_fdb(ldev); 989 if (err) 990 mlx5_core_err(dev0, "Can't enable single FDB mode\n"); 991 else 992 mlx5_core_info(dev0, "Operation mode is single FDB\n"); 993 } 994 995 if (err) { 996 MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG); 997 if (mlx5_cmd_exec_in(dev0, destroy_lag, in)) 998 mlx5_core_err(dev0, 999 "Failed to deactivate RoCE LAG; driver restart required\n"); 1000 } 1001 BLOCKING_INIT_NOTIFIER_HEAD(&dev0->priv.lag_nh); 1002 1003 return err; 1004 } 1005 1006 int mlx5_activate_lag(struct mlx5_lag *ldev, 1007 struct lag_tracker *tracker, 1008 enum mlx5_lag_mode mode, 1009 bool shared_fdb) 1010 { 1011 bool roce_lag = mode == MLX5_LAG_MODE_ROCE; 1012 struct mlx5_core_dev *dev0; 1013 unsigned long flags = 0; 1014 int master_idx; 1015 int err; 1016 1017 master_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); 1018 if (master_idx < 0) 1019 return -EINVAL; 1020 1021 dev0 = mlx5_lag_pf(ldev, master_idx)->dev; 1022 err = mlx5_lag_set_flags(ldev, mode, tracker, shared_fdb, &flags); 1023 if (err) 1024 return err; 1025 1026 if (mode != MLX5_LAG_MODE_MPESW) { 1027 mlx5_infer_tx_affinity_mapping(tracker, ldev, ldev->buckets, ldev->v2p_map); 1028 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) { 1029 err = mlx5_lag_port_sel_create(ldev, tracker->hash_type, 1030 ldev->v2p_map); 1031 if (err) { 1032 mlx5_core_err(dev0, 1033 "Failed to create LAG port selection(%d)\n", 1034 err); 1035 return err; 1036 } 1037 } 1038 } 1039 1040 err = mlx5_create_lag(ldev, tracker, mode, flags); 1041 if (err) { 1042 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) 1043 mlx5_lag_port_sel_destroy(ldev); 1044 if (roce_lag) 1045 mlx5_core_err(dev0, 1046 "Failed to activate RoCE LAG\n"); 1047 else 1048 mlx5_core_err(dev0, 1049 "Failed to activate VF LAG\n" 1050 "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n"); 1051 return err; 1052 } 1053 1054 if (tracker && tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP && 1055 !roce_lag) 1056 mlx5_lag_drop_rule_setup(ldev, tracker); 1057 1058 ldev->mode = mode; 1059 ldev->mode_flags = flags; 1060 return 0; 1061 } 1062 1063 int mlx5_deactivate_lag(struct mlx5_lag *ldev) 1064 { 1065 int master_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); 1066 u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {}; 1067 bool roce_lag = __mlx5_lag_is_roce(ldev); 1068 unsigned long flags = ldev->mode_flags; 1069 struct mlx5_core_dev *dev0; 1070 int err; 1071 1072 if (master_idx < 0) 1073 return -EINVAL; 1074 1075 dev0 = mlx5_lag_pf(ldev, master_idx)->dev; 1076 ldev->mode = MLX5_LAG_MODE_NONE; 1077 ldev->mode_flags = 0; 1078 mlx5_lag_mp_reset(ldev); 1079 1080 if (test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags)) { 1081 mlx5_lag_destroy_single_fdb(ldev); 1082 clear_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags); 1083 } 1084 1085 MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG); 1086 err = mlx5_cmd_exec_in(dev0, destroy_lag, in); 1087 if (err) { 1088 if (roce_lag) { 1089 mlx5_core_err(dev0, 1090 "Failed to deactivate RoCE LAG; driver restart required\n"); 1091 } else { 1092 mlx5_core_err(dev0, 1093 "Failed to deactivate VF LAG; driver restart required\n" 1094 "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n"); 1095 } 1096 return err; 1097 } 1098 1099 if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) { 1100 mlx5_lag_port_sel_destroy(ldev); 1101 ldev->buckets = 1; 1102 } 1103 if (mlx5_lag_has_drop_rule(ldev)) 1104 mlx5_lag_drop_rule_cleanup(ldev); 1105 1106 return 0; 1107 } 1108 1109 bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) 1110 { 1111 int master_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); 1112 #ifdef CONFIG_MLX5_ESWITCH 1113 struct mlx5_core_dev *dev; 1114 u8 mode; 1115 #endif 1116 struct lag_func *pf; 1117 bool roce_support; 1118 int i; 1119 1120 if (master_idx < 0 || mlx5_lag_num_devs(ldev) != ldev->ports) 1121 return false; 1122 1123 #ifdef CONFIG_MLX5_ESWITCH 1124 mlx5_ldev_for_each(i, 0, ldev) { 1125 pf = mlx5_lag_pf(ldev, i); 1126 dev = pf->dev; 1127 if (mlx5_eswitch_num_vfs(dev->priv.eswitch) && !is_mdev_switchdev_mode(dev)) 1128 return false; 1129 } 1130 1131 pf = mlx5_lag_pf(ldev, master_idx); 1132 dev = pf->dev; 1133 mode = mlx5_eswitch_mode(dev); 1134 mlx5_ldev_for_each(i, 0, ldev) { 1135 pf = mlx5_lag_pf(ldev, i); 1136 if (mlx5_eswitch_mode(pf->dev) != mode) 1137 return false; 1138 } 1139 1140 #else 1141 mlx5_ldev_for_each(i, 0, ldev) { 1142 pf = mlx5_lag_pf(ldev, i); 1143 if (mlx5_sriov_is_enabled(pf->dev)) 1144 return false; 1145 } 1146 #endif 1147 pf = mlx5_lag_pf(ldev, master_idx); 1148 roce_support = mlx5_get_roce_state(pf->dev); 1149 mlx5_ldev_for_each(i, 0, ldev) { 1150 if (i == master_idx) 1151 continue; 1152 pf = mlx5_lag_pf(ldev, i); 1153 if (mlx5_get_roce_state(pf->dev) != roce_support) 1154 return false; 1155 } 1156 1157 return true; 1158 } 1159 1160 static void mlx5_lag_assert_locked_transition(struct mlx5_lag *ldev, u32 filter) 1161 { 1162 struct mlx5_devcom_comp_dev *devcom = NULL; 1163 struct lag_func *pf; 1164 int i; 1165 1166 lockdep_assert_held(&ldev->lock); 1167 1168 i = mlx5_get_next_lag_func(ldev, 0, filter); 1169 if (i < MLX5_MAX_PORTS) { 1170 pf = mlx5_lag_pf(ldev, i); 1171 if (filter == MLX5_LAG_FILTER_PORTS || 1172 filter == MLX5_LAG_FILTER_ALL) 1173 devcom = pf->dev->priv.hca_devcom_comp; 1174 else 1175 devcom = mlx5_sd_get_devcom(pf->dev); 1176 } 1177 mlx5_devcom_comp_assert_locked(devcom); 1178 } 1179 1180 static void mlx5_lag_drop_lock_for_reps(struct mlx5_lag *ldev, u32 filter) 1181 { 1182 mlx5_lag_assert_locked_transition(ldev, filter); 1183 1184 /* Keep PF membership stable while ldev->lock is dropped. Device add 1185 * and remove paths observe mode_changes_in_progress and retry. 1186 */ 1187 ldev->mode_changes_in_progress++; 1188 mutex_unlock(&ldev->lock); 1189 } 1190 1191 static void mlx5_lag_retake_lock_after_reps(struct mlx5_lag *ldev) 1192 { 1193 mutex_lock(&ldev->lock); 1194 ldev->mode_changes_in_progress--; 1195 } 1196 1197 void mlx5_lag_rescan_dev_locked(struct mlx5_lag *ldev, 1198 struct mlx5_core_dev *dev, 1199 bool enable) 1200 { 1201 if (dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV) 1202 return; 1203 1204 if (enable) 1205 dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 1206 else 1207 dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 1208 1209 /* Auxiliary bus probe/remove can register or unregister representor 1210 * callbacks and take reps_lock. Drop ldev->lock so the only ordering 1211 * remains reps_lock -> ldev->lock from representor callbacks. 1212 */ 1213 mlx5_lag_drop_lock_for_reps(ldev, mlx5_lag_get_filter(ldev, dev)); 1214 mlx5_rescan_drivers_locked(dev); 1215 mlx5_lag_retake_lock_after_reps(ldev); 1216 } 1217 1218 static void mlx5_lag_rescan_devices_locked_filter(struct mlx5_lag *ldev, 1219 bool enable, u32 filter) 1220 { 1221 struct mlx5_core_dev *devs[MLX5_MAX_PORTS]; 1222 struct lag_func *pf; 1223 int num_devs = 0; 1224 int i; 1225 1226 mlx5_lag_assert_locked_transition(ldev, filter); 1227 1228 mlx5_lag_for_each(i, 0, ldev, filter) { 1229 pf = mlx5_lag_pf(ldev, i); 1230 if (pf->dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV) 1231 continue; 1232 1233 if (enable) 1234 pf->dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 1235 else 1236 pf->dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; 1237 devs[num_devs++] = pf->dev; 1238 } 1239 1240 mlx5_lag_drop_lock_for_reps(ldev, filter); 1241 for (i = 0; i < num_devs; i++) 1242 mlx5_rescan_drivers_locked(devs[i]); 1243 mlx5_lag_retake_lock_after_reps(ldev); 1244 } 1245 1246 void mlx5_lag_add_devices_filter(struct mlx5_lag *ldev, u32 filter) 1247 { 1248 mlx5_lag_rescan_devices_locked_filter(ldev, true, filter); 1249 } 1250 1251 void mlx5_lag_add_devices(struct mlx5_lag *ldev) 1252 { 1253 mlx5_lag_add_devices_filter(ldev, MLX5_LAG_FILTER_PORTS); 1254 } 1255 1256 void mlx5_lag_remove_devices_filter(struct mlx5_lag *ldev, u32 filter) 1257 { 1258 mlx5_lag_rescan_devices_locked_filter(ldev, false, filter); 1259 } 1260 1261 void mlx5_lag_remove_devices(struct mlx5_lag *ldev) 1262 { 1263 mlx5_lag_remove_devices_filter(ldev, MLX5_LAG_FILTER_PORTS); 1264 } 1265 1266 static int mlx5_lag_reload_ib_reps_unlocked(struct mlx5_lag *ldev, u32 flags, 1267 u32 filter, bool cont_on_fail) 1268 { 1269 struct lag_func *pf; 1270 int ret; 1271 int i; 1272 1273 mlx5_lag_for_each(i, 0, ldev, filter) { 1274 pf = mlx5_lag_pf(ldev, i); 1275 if (!(pf->dev->priv.flags & flags)) { 1276 struct mlx5_eswitch *esw; 1277 1278 esw = pf->dev->priv.eswitch; 1279 mlx5_esw_reps_block(esw); 1280 ret = mlx5_eswitch_reload_ib_reps(esw); 1281 mlx5_esw_reps_unblock(esw); 1282 if (ret && !cont_on_fail) 1283 return ret; 1284 } 1285 } 1286 1287 return 0; 1288 } 1289 1290 static int mlx5_lag_reload_ib_reps(struct mlx5_lag *ldev, u32 flags, 1291 u32 filter, bool cont_on_fail) 1292 { 1293 int ret; 1294 1295 /* The HCA devcom component lock serializes LAG mode transitions while 1296 * ldev->lock is dropped here. Dropping ldev->lock is required because 1297 * the reload takes the per-E-Switch reps_lock, and representor 1298 * load/unload callbacks can re-enter LAG netdev add/remove and take 1299 * ldev->lock. Keep the ordering reps_lock -> ldev->lock. 1300 */ 1301 mlx5_lag_drop_lock_for_reps(ldev, filter); 1302 ret = mlx5_lag_reload_ib_reps_unlocked(ldev, flags, filter, 1303 cont_on_fail); 1304 mlx5_lag_retake_lock_after_reps(ldev); 1305 1306 return ret; 1307 } 1308 1309 int mlx5_lag_reload_ib_reps_from_locked(struct mlx5_lag *ldev, u32 flags, 1310 u32 filter, bool cont_on_fail) 1311 { 1312 return mlx5_lag_reload_ib_reps(ldev, flags, filter, cont_on_fail); 1313 } 1314 1315 static void mlx5_lag_unload_reps_unlocked(struct mlx5_lag *ldev, u32 filter) 1316 { 1317 struct lag_func *pf; 1318 int i; 1319 1320 mlx5_lag_for_each(i, 0, ldev, filter) { 1321 struct mlx5_eswitch *esw; 1322 1323 pf = mlx5_lag_pf(ldev, i); 1324 esw = pf->dev->priv.eswitch; 1325 mlx5_esw_reps_block(esw); 1326 mlx5_eswitch_unload_reps(esw); 1327 mlx5_esw_reps_unblock(esw); 1328 } 1329 } 1330 1331 void mlx5_lag_unload_reps_from_locked(struct mlx5_lag *ldev, u32 filter) 1332 { 1333 /* Same lock dance as mlx5_lag_reload_ib_reps: drop ldev->lock around 1334 * the per-eswitch reps_lock to keep the reps_lock -> ldev->lock order. 1335 */ 1336 mlx5_lag_drop_lock_for_reps(ldev, filter); 1337 mlx5_lag_unload_reps_unlocked(ldev, filter); 1338 mlx5_lag_retake_lock_after_reps(ldev); 1339 } 1340 1341 void mlx5_disable_lag(struct mlx5_lag *ldev) 1342 { 1343 bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags); 1344 int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); 1345 struct mlx5_core_dev *dev0; 1346 bool roce_lag; 1347 int err; 1348 int i; 1349 1350 if (idx < 0) 1351 return; 1352 1353 if (shared_fdb) { 1354 mlx5_lag_shared_fdb_destroy(ldev, 0); 1355 return; 1356 } 1357 1358 dev0 = mlx5_lag_pf(ldev, idx)->dev; 1359 roce_lag = __mlx5_lag_is_roce(ldev); 1360 1361 if (roce_lag) { 1362 mlx5_lag_rescan_dev_locked(ldev, dev0, false); 1363 mlx5_ldev_for_each(i, 0, ldev) { 1364 if (i == idx) 1365 continue; 1366 mlx5_nic_vport_disable_roce(mlx5_lag_pf(ldev, i)->dev); 1367 } 1368 } 1369 1370 err = mlx5_deactivate_lag(ldev); 1371 if (err) 1372 return; 1373 1374 if (roce_lag) 1375 mlx5_lag_add_devices(ldev); 1376 } 1377 1378 static bool mlx5_lag_is_roce_lag(struct mlx5_lag *ldev) 1379 { 1380 bool roce_lag = true; 1381 struct lag_func *pf; 1382 int i; 1383 1384 mlx5_ldev_for_each(i, 0, ldev) { 1385 pf = mlx5_lag_pf(ldev, i); 1386 roce_lag = roce_lag && !mlx5_sriov_is_enabled(pf->dev); 1387 } 1388 1389 #ifdef CONFIG_MLX5_ESWITCH 1390 mlx5_ldev_for_each(i, 0, ldev) { 1391 pf = mlx5_lag_pf(ldev, i); 1392 roce_lag = roce_lag && is_mdev_legacy_mode(pf->dev); 1393 } 1394 #endif 1395 1396 return roce_lag; 1397 } 1398 1399 static bool mlx5_lag_should_modify_lag(struct mlx5_lag *ldev, bool do_bond) 1400 { 1401 return do_bond && __mlx5_lag_is_active(ldev) && 1402 ldev->mode != MLX5_LAG_MODE_MPESW; 1403 } 1404 1405 static bool mlx5_lag_should_disable_lag(struct mlx5_lag *ldev, bool do_bond) 1406 { 1407 return !do_bond && __mlx5_lag_is_active(ldev) && 1408 ldev->mode != MLX5_LAG_MODE_MPESW; 1409 } 1410 1411 #ifdef CONFIG_MLX5_ESWITCH 1412 static int 1413 mlx5_lag_sum_devices_speed(struct mlx5_lag *ldev, u32 *sum_speed, 1414 int (*get_speed)(struct mlx5_core_dev *, u32 *)) 1415 { 1416 struct mlx5_core_dev *pf_mdev; 1417 struct lag_func *pf; 1418 int pf_idx; 1419 u32 speed; 1420 int ret; 1421 1422 *sum_speed = 0; 1423 mlx5_ldev_for_each(pf_idx, 0, ldev) { 1424 pf = mlx5_lag_pf(ldev, pf_idx); 1425 if (!pf) 1426 continue; 1427 pf_mdev = pf->dev; 1428 if (!pf_mdev) 1429 continue; 1430 1431 ret = get_speed(pf_mdev, &speed); 1432 if (ret) { 1433 mlx5_core_dbg(pf_mdev, 1434 "Failed to get device speed using %ps. Device %s speed is not available (err=%d)\n", 1435 get_speed, dev_name(pf_mdev->device), 1436 ret); 1437 return ret; 1438 } 1439 1440 *sum_speed += speed; 1441 } 1442 1443 return 0; 1444 } 1445 1446 static int mlx5_lag_sum_devices_max_speed(struct mlx5_lag *ldev, u32 *max_speed) 1447 { 1448 return mlx5_lag_sum_devices_speed(ldev, max_speed, 1449 mlx5_port_max_linkspeed); 1450 } 1451 1452 static int mlx5_lag_sum_devices_oper_speed(struct mlx5_lag *ldev, 1453 u32 *oper_speed) 1454 { 1455 return mlx5_lag_sum_devices_speed(ldev, oper_speed, 1456 mlx5_port_oper_linkspeed); 1457 } 1458 1459 static void mlx5_lag_modify_device_vports_speed(struct mlx5_core_dev *mdev, 1460 u32 speed) 1461 { 1462 u16 op_mod = MLX5_VPORT_STATE_OP_MOD_ESW_VPORT; 1463 struct mlx5_eswitch *esw = mdev->priv.eswitch; 1464 struct mlx5_vport *vport; 1465 unsigned long i; 1466 int ret; 1467 1468 if (!esw) 1469 return; 1470 1471 if (!MLX5_CAP_ESW(mdev, esw_vport_state_max_tx_speed)) 1472 return; 1473 1474 mlx5_esw_for_each_vport(esw, i, vport) { 1475 if (!vport) 1476 continue; 1477 1478 if (vport->vport == MLX5_VPORT_UPLINK) 1479 continue; 1480 1481 vport->agg_max_tx_speed = speed; 1482 1483 if (!vport->enabled) 1484 continue; 1485 1486 ret = mlx5_modify_vport_max_tx_speed(mdev, op_mod, 1487 vport->vport, true, speed); 1488 if (ret) 1489 mlx5_core_dbg(mdev, 1490 "Failed to set vport %d speed %d, err=%d\n", 1491 vport->vport, speed, ret); 1492 } 1493 } 1494 1495 void mlx5_lag_set_vports_agg_speed(struct mlx5_lag *ldev) 1496 { 1497 struct mlx5_core_dev *mdev; 1498 struct lag_func *pf; 1499 u32 speed; 1500 int pf_idx; 1501 1502 if (ldev->mode == MLX5_LAG_MODE_MPESW) { 1503 if (mlx5_lag_sum_devices_oper_speed(ldev, &speed)) 1504 return; 1505 } else { 1506 speed = ldev->tracker.bond_speed_mbps; 1507 if (speed == SPEED_UNKNOWN) 1508 return; 1509 } 1510 1511 /* If speed is not set, use the sum of max speeds of all PFs */ 1512 if (!speed && mlx5_lag_sum_devices_max_speed(ldev, &speed)) 1513 return; 1514 1515 speed = speed / MLX5_MAX_TX_SPEED_UNIT; 1516 1517 mlx5_ldev_for_each(pf_idx, 0, ldev) { 1518 pf = mlx5_lag_pf(ldev, pf_idx); 1519 if (!pf) 1520 continue; 1521 mdev = pf->dev; 1522 if (!mdev) 1523 continue; 1524 1525 mlx5_lag_modify_device_vports_speed(mdev, speed); 1526 } 1527 } 1528 1529 void mlx5_lag_reset_vports_speed(struct mlx5_lag *ldev) 1530 { 1531 struct mlx5_core_dev *mdev; 1532 struct lag_func *pf; 1533 u32 speed; 1534 int pf_idx; 1535 int ret; 1536 1537 mlx5_ldev_for_each(pf_idx, 0, ldev) { 1538 pf = mlx5_lag_pf(ldev, pf_idx); 1539 if (!pf) 1540 continue; 1541 mdev = pf->dev; 1542 if (!mdev) 1543 continue; 1544 1545 ret = mlx5_port_oper_linkspeed(mdev, &speed); 1546 if (ret) { 1547 mlx5_core_dbg(mdev, 1548 "Failed to reset vports speed for device %s. Oper speed is not available (err=%d)\n", 1549 dev_name(mdev->device), ret); 1550 continue; 1551 } 1552 1553 speed = speed / MLX5_MAX_TX_SPEED_UNIT; 1554 mlx5_lag_modify_device_vports_speed(mdev, speed); 1555 } 1556 } 1557 #endif 1558 1559 static void mlx5_do_bond(struct mlx5_lag *ldev) 1560 { 1561 int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); 1562 struct lag_tracker tracker = { }; 1563 struct mlx5_core_dev *dev0; 1564 struct net_device *ndev; 1565 bool do_bond, roce_lag; 1566 int err; 1567 int i; 1568 1569 if (idx < 0) 1570 return; 1571 1572 dev0 = mlx5_lag_pf(ldev, idx)->dev; 1573 if (!mlx5_lag_is_ready(ldev)) { 1574 do_bond = false; 1575 } else { 1576 /* VF LAG is in multipath mode, ignore bond change requests */ 1577 if (mlx5_lag_is_multipath(dev0)) 1578 return; 1579 1580 tracker = ldev->tracker; 1581 1582 do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev); 1583 } 1584 1585 if (do_bond && !__mlx5_lag_is_active(ldev)) { 1586 bool shared_fdb = mlx5_lag_shared_fdb_supported(ldev); 1587 1588 roce_lag = mlx5_lag_is_roce_lag(ldev); 1589 1590 if (shared_fdb) { 1591 err = mlx5_lag_shared_fdb_create(ldev, &tracker, 1592 MLX5_LAG_MODE_SRIOV, 1593 0); 1594 if (err) 1595 return; 1596 } else { 1597 if (roce_lag) 1598 mlx5_lag_remove_devices(ldev); 1599 1600 err = mlx5_activate_lag(ldev, &tracker, 1601 roce_lag ? MLX5_LAG_MODE_ROCE : 1602 MLX5_LAG_MODE_SRIOV, 1603 false); 1604 if (err) { 1605 if (roce_lag) 1606 mlx5_lag_add_devices(ldev); 1607 return; 1608 } 1609 1610 if (roce_lag) { 1611 struct mlx5_core_dev *dev; 1612 1613 mlx5_lag_rescan_dev_locked(ldev, dev0, true); 1614 mlx5_ldev_for_each(i, 0, ldev) { 1615 if (i == idx) 1616 continue; 1617 dev = mlx5_lag_pf(ldev, i)->dev; 1618 if (mlx5_get_roce_state(dev)) 1619 mlx5_nic_vport_enable_roce(dev); 1620 } 1621 } 1622 } 1623 if (tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) { 1624 ndev = mlx5_lag_active_backup_get_netdev(dev0); 1625 /** Only sriov and roce lag should have tracker->TX_type 1626 * set so no need to check the mode 1627 */ 1628 blocking_notifier_call_chain(&dev0->priv.lag_nh, 1629 MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE, 1630 ndev); 1631 dev_put(ndev); 1632 } 1633 if (!shared_fdb) 1634 mlx5_lag_set_vports_agg_speed(ldev); 1635 } else if (mlx5_lag_should_modify_lag(ldev, do_bond)) { 1636 mlx5_modify_lag(ldev, &tracker); 1637 mlx5_lag_set_vports_agg_speed(ldev); 1638 } else if (mlx5_lag_should_disable_lag(ldev, do_bond)) { 1639 mlx5_lag_reset_vports_speed(ldev); 1640 mlx5_disable_lag(ldev); 1641 } 1642 } 1643 1644 /* The last mdev to unregister will destroy the workqueue before removing the 1645 * devcom component, and as all the mdevs use the same devcom component we are 1646 * guaranteed that the devcom is valid while the calling work is running. 1647 */ 1648 struct mlx5_devcom_comp_dev *mlx5_lag_get_devcom_comp(struct mlx5_lag *ldev) 1649 { 1650 struct mlx5_devcom_comp_dev *devcom = NULL; 1651 struct lag_func *pf; 1652 int i; 1653 1654 mutex_lock(&ldev->lock); 1655 i = mlx5_get_next_lag_func(ldev, 0, MLX5_LAG_FILTER_PORTS); 1656 if (i < MLX5_MAX_PORTS) { 1657 pf = mlx5_lag_pf(ldev, i); 1658 devcom = pf->dev->priv.hca_devcom_comp; 1659 } 1660 mutex_unlock(&ldev->lock); 1661 return devcom; 1662 } 1663 1664 static int mlx5_lag_demux_ft_fg_init(struct mlx5_core_dev *dev, 1665 struct mlx5_flow_table_attr *ft_attr, 1666 struct lag_func *pf) 1667 { 1668 #ifdef CONFIG_MLX5_ESWITCH 1669 struct mlx5_flow_namespace *ns; 1670 struct mlx5_flow_group *fg; 1671 int err; 1672 1673 ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_LAG); 1674 if (!ns) 1675 return 0; 1676 1677 pf->lag_demux_ft = mlx5_create_flow_table(ns, ft_attr); 1678 if (IS_ERR(pf->lag_demux_ft)) 1679 return PTR_ERR(pf->lag_demux_ft); 1680 1681 fg = mlx5_esw_lag_demux_fg_create(dev->priv.eswitch, 1682 pf->lag_demux_ft); 1683 if (IS_ERR(fg)) { 1684 err = PTR_ERR(fg); 1685 mlx5_destroy_flow_table(pf->lag_demux_ft); 1686 pf->lag_demux_ft = NULL; 1687 return err; 1688 } 1689 1690 pf->lag_demux_fg = fg; 1691 return 0; 1692 #else 1693 return -EOPNOTSUPP; 1694 #endif 1695 } 1696 1697 static int mlx5_lag_demux_fw_init(struct mlx5_core_dev *dev, 1698 struct mlx5_flow_table_attr *ft_attr, 1699 struct lag_func *pf) 1700 { 1701 struct mlx5_flow_namespace *ns; 1702 int err; 1703 1704 ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_LAG); 1705 if (!ns) 1706 return 0; 1707 1708 pf->lag_demux_fg = NULL; 1709 ft_attr->max_fte = 1; 1710 pf->lag_demux_ft = mlx5_create_lag_demux_flow_table(ns, ft_attr); 1711 if (IS_ERR(pf->lag_demux_ft)) { 1712 err = PTR_ERR(pf->lag_demux_ft); 1713 pf->lag_demux_ft = NULL; 1714 return err; 1715 } 1716 1717 return 0; 1718 } 1719 1720 int mlx5_lag_demux_init(struct mlx5_core_dev *dev, 1721 struct mlx5_flow_table_attr *ft_attr) 1722 { 1723 struct mlx5_lag *ldev; 1724 struct lag_func *pf; 1725 1726 if (!ft_attr) 1727 return -EINVAL; 1728 1729 ldev = mlx5_lag_dev(dev); 1730 if (!ldev) 1731 return -ENODEV; 1732 1733 pf = mlx5_lag_pf_by_dev(ldev, dev); 1734 if (!pf) 1735 return -ENODEV; 1736 1737 xa_init(&pf->lag_demux_rules); 1738 1739 if (mlx5_lag_is_sw_lag(dev)) 1740 return mlx5_lag_demux_ft_fg_init(dev, ft_attr, pf); 1741 1742 return mlx5_lag_demux_fw_init(dev, ft_attr, pf); 1743 } 1744 EXPORT_SYMBOL(mlx5_lag_demux_init); 1745 1746 void mlx5_lag_demux_cleanup(struct mlx5_core_dev *dev) 1747 { 1748 struct mlx5_flow_handle *rule; 1749 struct mlx5_lag *ldev; 1750 unsigned long vport_num; 1751 struct lag_func *pf; 1752 1753 ldev = mlx5_lag_dev(dev); 1754 if (!ldev) 1755 return; 1756 1757 pf = mlx5_lag_pf_by_dev(ldev, dev); 1758 if (!pf) 1759 return; 1760 1761 xa_for_each(&pf->lag_demux_rules, vport_num, rule) 1762 mlx5_del_flow_rules(rule); 1763 xa_destroy(&pf->lag_demux_rules); 1764 1765 if (pf->lag_demux_fg) 1766 mlx5_destroy_flow_group(pf->lag_demux_fg); 1767 if (pf->lag_demux_ft) 1768 mlx5_destroy_flow_table(pf->lag_demux_ft); 1769 pf->lag_demux_fg = NULL; 1770 pf->lag_demux_ft = NULL; 1771 } 1772 EXPORT_SYMBOL(mlx5_lag_demux_cleanup); 1773 1774 static struct lag_func *mlx5_lag_dev_get_master_pf(struct mlx5_lag *ldev, 1775 struct mlx5_core_dev *dev) 1776 { 1777 u32 filter = mlx5_lag_get_filter(ldev, dev); 1778 int idx; 1779 1780 idx = mlx5_lag_get_dev_index_by_seq_filter(ldev, MLX5_LAG_P1, filter); 1781 if (idx < 0) 1782 return NULL; 1783 1784 return mlx5_lag_pf(ldev, idx); 1785 } 1786 1787 int mlx5_lag_demux_rule_add(struct mlx5_core_dev *vport_dev, u16 vport_num, 1788 int index) 1789 { 1790 struct mlx5_flow_handle *rule; 1791 struct lag_func *master; 1792 struct mlx5_lag *ldev; 1793 int err; 1794 1795 ldev = mlx5_lag_dev(vport_dev); 1796 if (!ldev) 1797 return 0; 1798 1799 master = mlx5_lag_dev_get_master_pf(ldev, vport_dev); 1800 if (!master || !master->lag_demux_fg) 1801 return 0; 1802 1803 if (xa_load(&master->lag_demux_rules, index)) 1804 return 0; 1805 1806 rule = mlx5_esw_lag_demux_rule_create(vport_dev->priv.eswitch, 1807 vport_num, master->lag_demux_ft); 1808 if (IS_ERR(rule)) { 1809 err = PTR_ERR(rule); 1810 mlx5_core_warn(vport_dev, 1811 "Failed to create LAG demux rule for vport %u, err %d\n", 1812 vport_num, err); 1813 return err; 1814 } 1815 1816 err = xa_err(xa_store(&master->lag_demux_rules, index, rule, 1817 GFP_KERNEL)); 1818 if (err) { 1819 mlx5_del_flow_rules(rule); 1820 mlx5_core_warn(vport_dev, 1821 "Failed to store LAG demux rule for vport %u, err %d\n", 1822 vport_num, err); 1823 } 1824 1825 return err; 1826 } 1827 EXPORT_SYMBOL(mlx5_lag_demux_rule_add); 1828 1829 void mlx5_lag_demux_rule_del(struct mlx5_core_dev *dev, int index) 1830 { 1831 struct mlx5_flow_handle *rule; 1832 struct lag_func *master_pf; 1833 struct mlx5_lag *ldev; 1834 1835 ldev = mlx5_lag_dev(dev); 1836 if (!ldev) 1837 return; 1838 1839 master_pf = mlx5_lag_dev_get_master_pf(ldev, dev); 1840 if (!master_pf || !master_pf->lag_demux_fg) 1841 return; 1842 1843 rule = xa_erase(&master_pf->lag_demux_rules, index); 1844 if (rule) 1845 mlx5_del_flow_rules(rule); 1846 } 1847 EXPORT_SYMBOL(mlx5_lag_demux_rule_del); 1848 1849 static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay) 1850 { 1851 queue_delayed_work(ldev->wq, &ldev->bond_work, delay); 1852 } 1853 1854 static void mlx5_do_bond_work(struct work_struct *work) 1855 { 1856 struct delayed_work *delayed_work = to_delayed_work(work); 1857 struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag, 1858 bond_work); 1859 struct mlx5_devcom_comp_dev *devcom; 1860 int status; 1861 1862 devcom = mlx5_lag_get_devcom_comp(ldev); 1863 if (!devcom) 1864 return; 1865 1866 status = mlx5_devcom_comp_trylock(devcom); 1867 if (!status) { 1868 mlx5_queue_bond_work(ldev, HZ); 1869 return; 1870 } 1871 1872 mutex_lock(&ldev->lock); 1873 if (ldev->mode_changes_in_progress) { 1874 mutex_unlock(&ldev->lock); 1875 mlx5_devcom_comp_unlock(devcom); 1876 mlx5_queue_bond_work(ldev, HZ); 1877 return; 1878 } 1879 1880 mlx5_do_bond(ldev); 1881 mutex_unlock(&ldev->lock); 1882 mlx5_devcom_comp_unlock(devcom); 1883 } 1884 1885 static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, 1886 struct lag_tracker *tracker, 1887 struct netdev_notifier_changeupper_info *info) 1888 { 1889 struct net_device *upper = info->upper_dev, *ndev_tmp; 1890 struct netdev_lag_upper_info *lag_upper_info = NULL; 1891 bool is_bonded, is_in_lag, mode_supported; 1892 bool has_inactive = 0; 1893 struct lag_func *pf; 1894 struct slave *slave; 1895 u8 bond_status = 0; 1896 int num_slaves = 0; 1897 int changed = 0; 1898 int i, idx = -1; 1899 1900 if (!netif_is_lag_master(upper)) 1901 return 0; 1902 1903 if (info->linking) 1904 lag_upper_info = info->upper_info; 1905 1906 /* The event may still be of interest if the slave does not belong to 1907 * us, but is enslaved to a master which has one or more of our netdevs 1908 * as slaves (e.g., if a new slave is added to a master that bonds two 1909 * of our netdevs, we should unbond). 1910 */ 1911 rcu_read_lock(); 1912 for_each_netdev_in_bond_rcu(upper, ndev_tmp) { 1913 mlx5_ldev_for_each(i, 0, ldev) { 1914 pf = mlx5_lag_pf(ldev, i); 1915 if (pf->netdev == ndev_tmp) { 1916 idx++; 1917 break; 1918 } 1919 } 1920 if (i < MLX5_MAX_PORTS) { 1921 slave = bond_slave_get_rcu(ndev_tmp); 1922 if (slave) 1923 has_inactive |= bond_is_slave_inactive(slave); 1924 bond_status |= (1 << idx); 1925 } 1926 1927 num_slaves++; 1928 } 1929 rcu_read_unlock(); 1930 1931 /* None of this lagdev's netdevs are slaves of this master. */ 1932 if (!(bond_status & GENMASK(ldev->ports - 1, 0))) 1933 return 0; 1934 1935 if (lag_upper_info) { 1936 tracker->tx_type = lag_upper_info->tx_type; 1937 tracker->hash_type = lag_upper_info->hash_type; 1938 } 1939 1940 tracker->has_inactive = has_inactive; 1941 /* Determine bonding status: 1942 * A device is considered bonded if both its physical ports are slaves 1943 * of the same lag master, and only them. 1944 */ 1945 is_in_lag = num_slaves == ldev->ports && 1946 bond_status == GENMASK(ldev->ports - 1, 0); 1947 1948 /* Lag mode must be activebackup or hash. */ 1949 mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP || 1950 tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH; 1951 1952 is_bonded = is_in_lag && mode_supported; 1953 if (tracker->is_bonded != is_bonded) { 1954 tracker->is_bonded = is_bonded; 1955 changed = 1; 1956 } 1957 1958 if (!is_in_lag) 1959 return changed; 1960 1961 if (!mlx5_lag_is_ready(ldev)) 1962 NL_SET_ERR_MSG_MOD(info->info.extack, 1963 "Can't activate LAG offload, PF is configured with more than 64 VFs"); 1964 else if (!mode_supported) 1965 NL_SET_ERR_MSG_MOD(info->info.extack, 1966 "Can't activate LAG offload, TX type isn't supported"); 1967 1968 return changed; 1969 } 1970 1971 static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev, 1972 struct lag_tracker *tracker, 1973 struct net_device *ndev, 1974 struct netdev_notifier_changelowerstate_info *info) 1975 { 1976 struct netdev_lag_lower_state_info *lag_lower_info; 1977 int idx; 1978 1979 if (!netif_is_lag_port(ndev)) 1980 return 0; 1981 1982 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev); 1983 if (idx < 0) 1984 return 0; 1985 1986 /* This information is used to determine virtual to physical 1987 * port mapping. 1988 */ 1989 lag_lower_info = info->lower_state_info; 1990 if (!lag_lower_info) 1991 return 0; 1992 1993 tracker->netdev_state[idx] = *lag_lower_info; 1994 1995 return 1; 1996 } 1997 1998 static int mlx5_handle_changeinfodata_event(struct mlx5_lag *ldev, 1999 struct lag_tracker *tracker, 2000 struct net_device *ndev) 2001 { 2002 struct net_device *ndev_tmp; 2003 struct slave *slave; 2004 bool has_inactive = 0; 2005 int idx; 2006 2007 if (!netif_is_lag_master(ndev)) 2008 return 0; 2009 2010 rcu_read_lock(); 2011 for_each_netdev_in_bond_rcu(ndev, ndev_tmp) { 2012 idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp); 2013 if (idx < 0) 2014 continue; 2015 2016 slave = bond_slave_get_rcu(ndev_tmp); 2017 if (slave) 2018 has_inactive |= bond_is_slave_inactive(slave); 2019 } 2020 rcu_read_unlock(); 2021 2022 if (tracker->has_inactive == has_inactive) 2023 return 0; 2024 2025 tracker->has_inactive = has_inactive; 2026 2027 return 1; 2028 } 2029 2030 static void mlx5_lag_update_tracker_speed(struct lag_tracker *tracker, 2031 struct net_device *ndev) 2032 { 2033 struct ethtool_link_ksettings lksettings; 2034 struct net_device *bond_dev; 2035 int err; 2036 2037 if (netif_is_lag_master(ndev)) 2038 bond_dev = ndev; 2039 else 2040 bond_dev = netdev_master_upper_dev_get(ndev); 2041 2042 if (!bond_dev) { 2043 tracker->bond_speed_mbps = SPEED_UNKNOWN; 2044 return; 2045 } 2046 2047 err = __ethtool_get_link_ksettings(bond_dev, &lksettings); 2048 if (err) { 2049 netdev_dbg(bond_dev, 2050 "Failed to get speed for bond dev %s, err=%d\n", 2051 bond_dev->name, err); 2052 tracker->bond_speed_mbps = SPEED_UNKNOWN; 2053 return; 2054 } 2055 2056 if (lksettings.base.speed == SPEED_UNKNOWN) 2057 tracker->bond_speed_mbps = 0; 2058 else 2059 tracker->bond_speed_mbps = lksettings.base.speed; 2060 } 2061 2062 /* Returns speed in Mbps. */ 2063 int mlx5_lag_query_bond_speed(struct mlx5_core_dev *mdev, u32 *speed) 2064 { 2065 struct mlx5_lag *ldev; 2066 unsigned long flags; 2067 int ret = 0; 2068 2069 spin_lock_irqsave(&lag_lock, flags); 2070 ldev = mlx5_lag_dev(mdev); 2071 if (!ldev) { 2072 ret = -ENODEV; 2073 goto unlock; 2074 } 2075 2076 *speed = ldev->tracker.bond_speed_mbps; 2077 2078 if (*speed == SPEED_UNKNOWN) { 2079 mlx5_core_dbg(mdev, "Bond speed is unknown\n"); 2080 ret = -EINVAL; 2081 } 2082 2083 unlock: 2084 spin_unlock_irqrestore(&lag_lock, flags); 2085 return ret; 2086 } 2087 EXPORT_SYMBOL_GPL(mlx5_lag_query_bond_speed); 2088 2089 /* this handler is always registered to netdev events */ 2090 static int mlx5_lag_netdev_event(struct notifier_block *this, 2091 unsigned long event, void *ptr) 2092 { 2093 struct net_device *ndev = netdev_notifier_info_to_dev(ptr); 2094 struct lag_tracker tracker; 2095 struct mlx5_lag *ldev; 2096 int changed = 0; 2097 2098 if (event != NETDEV_CHANGEUPPER && 2099 event != NETDEV_CHANGELOWERSTATE && 2100 event != NETDEV_CHANGEINFODATA) 2101 return NOTIFY_DONE; 2102 2103 ldev = container_of(this, struct mlx5_lag, nb); 2104 2105 tracker = ldev->tracker; 2106 2107 switch (event) { 2108 case NETDEV_CHANGEUPPER: 2109 changed = mlx5_handle_changeupper_event(ldev, &tracker, ptr); 2110 break; 2111 case NETDEV_CHANGELOWERSTATE: 2112 changed = mlx5_handle_changelowerstate_event(ldev, &tracker, 2113 ndev, ptr); 2114 break; 2115 case NETDEV_CHANGEINFODATA: 2116 changed = mlx5_handle_changeinfodata_event(ldev, &tracker, ndev); 2117 break; 2118 } 2119 2120 if (changed) 2121 mlx5_lag_update_tracker_speed(&tracker, ndev); 2122 2123 ldev->tracker = tracker; 2124 2125 if (changed) 2126 mlx5_queue_bond_work(ldev, 0); 2127 2128 return NOTIFY_DONE; 2129 } 2130 2131 static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev, 2132 struct mlx5_core_dev *dev, 2133 struct net_device *netdev) 2134 { 2135 struct lag_func *pf; 2136 unsigned long flags; 2137 int i; 2138 2139 spin_lock_irqsave(&lag_lock, flags); 2140 /* Find pf entry by matching dev pointer */ 2141 mlx5_ldev_for_each(i, 0, ldev) { 2142 pf = mlx5_lag_pf(ldev, i); 2143 if (pf->dev == dev) { 2144 pf->netdev = netdev; 2145 ldev->tracker.netdev_state[i].link_up = 0; 2146 ldev->tracker.netdev_state[i].tx_enabled = 0; 2147 break; 2148 } 2149 } 2150 spin_unlock_irqrestore(&lag_lock, flags); 2151 } 2152 2153 static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev, 2154 struct net_device *netdev) 2155 { 2156 struct lag_func *pf; 2157 unsigned long flags; 2158 int i; 2159 2160 spin_lock_irqsave(&lag_lock, flags); 2161 mlx5_ldev_for_each(i, 0, ldev) { 2162 pf = mlx5_lag_pf(ldev, i); 2163 if (pf->netdev == netdev) { 2164 pf->netdev = NULL; 2165 break; 2166 } 2167 } 2168 spin_unlock_irqrestore(&lag_lock, flags); 2169 } 2170 2171 int mlx5_ldev_add_mdev(struct mlx5_lag *ldev, 2172 struct mlx5_core_dev *dev, 2173 u32 group_id) 2174 { 2175 struct lag_func *pf; 2176 u32 idx; 2177 int err; 2178 2179 pf = kzalloc_obj(*pf); 2180 if (!pf) 2181 return -ENOMEM; 2182 2183 err = xa_alloc(&ldev->pfs, &idx, pf, XA_LIMIT(0, MLX5_MAX_PORTS - 1), 2184 GFP_KERNEL); 2185 if (err) { 2186 kfree(pf); 2187 return err; 2188 } 2189 2190 pf->idx = idx; 2191 pf->dev = dev; 2192 pf->group_id = group_id; 2193 dev->priv.lag = ldev; 2194 2195 if (group_id) 2196 return 0; 2197 2198 xa_set_mark(&ldev->pfs, idx, MLX5_LAG_XA_MARK_PORT); 2199 2200 MLX5_NB_INIT(&pf->port_change_nb, 2201 mlx5_lag_mpesw_port_change_event, PORT_CHANGE); 2202 mlx5_eq_notifier_register(dev, &pf->port_change_nb); 2203 2204 return 0; 2205 } 2206 2207 void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev, 2208 struct mlx5_core_dev *dev) 2209 { 2210 struct lag_func *pf; 2211 int i; 2212 2213 mlx5_lag_for_each(i, 0, ldev, MLX5_LAG_FILTER_ALL) { 2214 pf = mlx5_lag_pf(ldev, i); 2215 if (pf->dev == dev) 2216 break; 2217 } 2218 if (i >= MLX5_MAX_PORTS) 2219 return; 2220 2221 if (pf->port_change_nb.nb.notifier_call) 2222 mlx5_eq_notifier_unregister(dev, &pf->port_change_nb); 2223 2224 pf->dev = NULL; 2225 dev->priv.lag = NULL; 2226 xa_erase(&ldev->pfs, pf->idx); 2227 kfree(pf); 2228 } 2229 2230 /* Must be called with HCA devcom component lock held */ 2231 static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev) 2232 { 2233 struct mlx5_devcom_comp_dev *pos = NULL; 2234 struct mlx5_lag *ldev = NULL; 2235 struct mlx5_core_dev *tmp_dev; 2236 int err; 2237 2238 tmp_dev = mlx5_devcom_get_next_peer_data(dev->priv.hca_devcom_comp, &pos); 2239 if (tmp_dev) 2240 ldev = mlx5_lag_dev(tmp_dev); 2241 2242 if (!ldev) { 2243 ldev = mlx5_lag_dev_alloc(dev); 2244 if (!ldev) { 2245 mlx5_core_err(dev, "Failed to alloc lag dev\n"); 2246 return 0; 2247 } 2248 err = mlx5_ldev_add_mdev(ldev, dev, 0); 2249 if (err) { 2250 mlx5_core_err(dev, "Failed to add mdev to lag dev\n"); 2251 mlx5_ldev_put(ldev); 2252 return 0; 2253 } 2254 return 0; 2255 } 2256 2257 mutex_lock(&ldev->lock); 2258 if (ldev->mode_changes_in_progress) { 2259 mutex_unlock(&ldev->lock); 2260 return -EAGAIN; 2261 } 2262 mlx5_ldev_get(ldev); 2263 err = mlx5_ldev_add_mdev(ldev, dev, 0); 2264 if (err) { 2265 mlx5_ldev_put(ldev); 2266 mutex_unlock(&ldev->lock); 2267 return err; 2268 } 2269 mutex_unlock(&ldev->lock); 2270 2271 return 0; 2272 } 2273 2274 static void mlx5_lag_unregister_hca_devcom_comp(struct mlx5_core_dev *dev) 2275 { 2276 mlx5_devcom_unregister_component(dev->priv.hca_devcom_comp); 2277 dev->priv.hca_devcom_comp = NULL; 2278 } 2279 2280 static int mlx5_lag_register_hca_devcom_comp(struct mlx5_core_dev *dev) 2281 { 2282 struct mlx5_devcom_match_attr attr = { 2283 .flags = MLX5_DEVCOM_MATCH_FLAGS_NS, 2284 .net = mlx5_core_net(dev), 2285 }; 2286 u8 len __always_unused; 2287 2288 mlx5_query_nic_sw_system_image_guid(dev, attr.key.buf, &len); 2289 2290 /* This component is use to sync adding core_dev to lag_dev and to sync 2291 * changes of mlx5_adev_devices between LAG layer and other layers. 2292 */ 2293 dev->priv.hca_devcom_comp = 2294 mlx5_devcom_register_component(dev->priv.devc, 2295 MLX5_DEVCOM_HCA_PORTS, 2296 &attr, mlx5_lag_devcom_event, 2297 dev); 2298 if (!dev->priv.hca_devcom_comp) { 2299 mlx5_core_err(dev, 2300 "Failed to register devcom HCA component."); 2301 return -EINVAL; 2302 } 2303 2304 return 0; 2305 } 2306 2307 void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev) 2308 { 2309 struct mlx5_lag *ldev; 2310 2311 ldev = mlx5_lag_dev(dev); 2312 if (!ldev) 2313 return; 2314 2315 /* mdev is being removed, might as well remove debugfs 2316 * as early as possible. 2317 */ 2318 mlx5_ldev_remove_debugfs(dev->priv.dbg.lag_debugfs); 2319 recheck: 2320 mutex_lock(&ldev->lock); 2321 if (ldev->mode_changes_in_progress) { 2322 mutex_unlock(&ldev->lock); 2323 msleep(100); 2324 goto recheck; 2325 } 2326 mlx5_ldev_remove_mdev(ldev, dev); 2327 mutex_unlock(&ldev->lock); 2328 /* Send devcom event to notify peers that a device is being removed */ 2329 mlx5_devcom_send_event(dev->priv.hca_devcom_comp, 2330 LAG_DEVCOM_UNPAIR, LAG_DEVCOM_UNPAIR, dev); 2331 mlx5_lag_unregister_hca_devcom_comp(dev); 2332 mlx5_ldev_put(ldev); 2333 } 2334 2335 void mlx5_lag_add_mdev(struct mlx5_core_dev *dev) 2336 { 2337 int err; 2338 2339 if (!mlx5_lag_is_supported(dev)) 2340 return; 2341 2342 if (mlx5_lag_register_hca_devcom_comp(dev)) 2343 return; 2344 2345 recheck: 2346 mlx5_devcom_comp_lock(dev->priv.hca_devcom_comp); 2347 err = __mlx5_lag_dev_add_mdev(dev); 2348 mlx5_devcom_comp_unlock(dev->priv.hca_devcom_comp); 2349 2350 if (err) { 2351 msleep(100); 2352 goto recheck; 2353 } 2354 /* Send devcom event to notify peers that a device was added */ 2355 mlx5_devcom_send_event(dev->priv.hca_devcom_comp, 2356 LAG_DEVCOM_PAIR, LAG_DEVCOM_UNPAIR, dev); 2357 mlx5_ldev_add_debugfs(dev); 2358 } 2359 2360 void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev, 2361 struct net_device *netdev) 2362 { 2363 struct mlx5_lag *ldev; 2364 bool lag_is_active; 2365 2366 ldev = mlx5_lag_dev(dev); 2367 if (!ldev) 2368 return; 2369 2370 mutex_lock(&ldev->lock); 2371 mlx5_ldev_remove_netdev(ldev, netdev); 2372 clear_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags); 2373 2374 lag_is_active = __mlx5_lag_is_active(ldev); 2375 mutex_unlock(&ldev->lock); 2376 2377 if (lag_is_active) 2378 mlx5_queue_bond_work(ldev, 0); 2379 } 2380 2381 void mlx5_lag_add_netdev(struct mlx5_core_dev *dev, 2382 struct net_device *netdev) 2383 { 2384 struct mlx5_lag *ldev; 2385 int num = 0; 2386 2387 ldev = mlx5_lag_dev(dev); 2388 if (!ldev) 2389 return; 2390 2391 mutex_lock(&ldev->lock); 2392 mlx5_ldev_add_netdev(ldev, dev, netdev); 2393 num = mlx5_lag_num_netdevs(ldev); 2394 if (num >= ldev->ports) 2395 set_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags); 2396 mutex_unlock(&ldev->lock); 2397 mlx5_queue_bond_work(ldev, 0); 2398 } 2399 2400 int mlx5_get_pre_lag_func(struct mlx5_lag *ldev, int start_idx, int end_idx, 2401 u32 filter) 2402 { 2403 struct lag_func *pf; 2404 int i; 2405 2406 for (i = start_idx; i >= end_idx; i--) { 2407 pf = xa_load(&ldev->pfs, i); 2408 if (!pf || !pf->dev) 2409 continue; 2410 if (filter == MLX5_LAG_FILTER_PORTS) { 2411 if (xa_get_mark(&ldev->pfs, i, MLX5_LAG_XA_MARK_PORT)) 2412 return i; 2413 } else if (filter == MLX5_LAG_FILTER_ALL || 2414 filter == pf->group_id) { 2415 return i; 2416 } 2417 } 2418 return -1; 2419 } 2420 2421 int mlx5_get_next_lag_func(struct mlx5_lag *ldev, int start_idx, u32 filter) 2422 { 2423 struct lag_func *pf; 2424 unsigned long idx; 2425 2426 if (filter == MLX5_LAG_FILTER_PORTS) { 2427 xa_for_each_marked_start(&ldev->pfs, idx, pf, 2428 MLX5_LAG_XA_MARK_PORT, start_idx) 2429 if (pf->dev) 2430 return idx; 2431 return MLX5_MAX_PORTS; 2432 } 2433 2434 xa_for_each_start(&ldev->pfs, idx, pf, start_idx) { 2435 if (!pf->dev) 2436 continue; 2437 if (filter == MLX5_LAG_FILTER_ALL || 2438 filter == pf->group_id) 2439 return idx; 2440 } 2441 return MLX5_MAX_PORTS; 2442 } 2443 2444 bool mlx5_lag_is_roce(struct mlx5_core_dev *dev) 2445 { 2446 struct mlx5_lag *ldev; 2447 unsigned long flags; 2448 bool res; 2449 2450 spin_lock_irqsave(&lag_lock, flags); 2451 ldev = mlx5_lag_dev(dev); 2452 res = ldev && __mlx5_lag_is_roce(ldev); 2453 spin_unlock_irqrestore(&lag_lock, flags); 2454 2455 return res; 2456 } 2457 EXPORT_SYMBOL(mlx5_lag_is_roce); 2458 2459 bool mlx5_lag_is_active(struct mlx5_core_dev *dev) 2460 { 2461 struct mlx5_lag *ldev; 2462 unsigned long flags; 2463 bool res; 2464 2465 spin_lock_irqsave(&lag_lock, flags); 2466 ldev = mlx5_lag_dev(dev); 2467 res = ldev && (__mlx5_lag_is_active(ldev) || 2468 __mlx5_lag_is_sd_active(ldev, dev)); 2469 spin_unlock_irqrestore(&lag_lock, flags); 2470 2471 return res; 2472 } 2473 EXPORT_SYMBOL(mlx5_lag_is_active); 2474 2475 bool mlx5_lag_mode_is_hash(struct mlx5_core_dev *dev) 2476 { 2477 struct mlx5_lag *ldev; 2478 unsigned long flags; 2479 bool res = 0; 2480 2481 spin_lock_irqsave(&lag_lock, flags); 2482 ldev = mlx5_lag_dev(dev); 2483 if (ldev) 2484 res = test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags); 2485 spin_unlock_irqrestore(&lag_lock, flags); 2486 2487 return res; 2488 } 2489 EXPORT_SYMBOL(mlx5_lag_mode_is_hash); 2490 2491 bool mlx5_lag_is_master(struct mlx5_core_dev *dev) 2492 { 2493 struct mlx5_lag *ldev; 2494 unsigned long flags; 2495 struct lag_func *pf; 2496 bool res = false; 2497 int idx; 2498 2499 spin_lock_irqsave(&lag_lock, flags); 2500 ldev = mlx5_lag_dev(dev); 2501 if (ldev) { 2502 u32 filter; 2503 2504 filter = mlx5_lag_get_filter(ldev, dev); 2505 idx = mlx5_lag_get_dev_index_by_seq_filter(ldev, MLX5_LAG_P1, 2506 filter); 2507 if ((__mlx5_lag_is_active(ldev) || 2508 __mlx5_lag_is_sd_active(ldev, dev)) && idx >= 0) { 2509 pf = mlx5_lag_pf(ldev, idx); 2510 res = pf && dev == pf->dev; 2511 } 2512 } 2513 spin_unlock_irqrestore(&lag_lock, flags); 2514 2515 return res; 2516 } 2517 EXPORT_SYMBOL(mlx5_lag_is_master); 2518 2519 bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev) 2520 { 2521 struct mlx5_lag *ldev; 2522 unsigned long flags; 2523 bool res; 2524 2525 spin_lock_irqsave(&lag_lock, flags); 2526 ldev = mlx5_lag_dev(dev); 2527 res = ldev && __mlx5_lag_is_sriov(ldev); 2528 spin_unlock_irqrestore(&lag_lock, flags); 2529 2530 return res; 2531 } 2532 EXPORT_SYMBOL(mlx5_lag_is_sriov); 2533 2534 bool mlx5_lag_is_sd(struct mlx5_core_dev *dev) 2535 { 2536 struct mlx5_lag *ldev; 2537 unsigned long flags; 2538 bool res; 2539 2540 spin_lock_irqsave(&lag_lock, flags); 2541 ldev = mlx5_lag_dev(dev); 2542 res = ldev && __mlx5_lag_is_sd(ldev, dev); 2543 spin_unlock_irqrestore(&lag_lock, flags); 2544 2545 return res; 2546 } 2547 2548 bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev) 2549 { 2550 struct mlx5_lag *ldev; 2551 unsigned long flags; 2552 bool res = false; 2553 2554 spin_lock_irqsave(&lag_lock, flags); 2555 ldev = mlx5_lag_dev(dev); 2556 if (ldev) { 2557 res = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, 2558 &ldev->mode_flags); 2559 if (__mlx5_lag_is_sd(ldev, dev) && !__mlx5_lag_is_active(ldev)) 2560 res = __mlx5_lag_is_sd_active(ldev, dev); 2561 } 2562 spin_unlock_irqrestore(&lag_lock, flags); 2563 2564 return res; 2565 } 2566 EXPORT_SYMBOL(mlx5_lag_is_shared_fdb); 2567 2568 void mlx5_lag_disable_change(struct mlx5_core_dev *dev) 2569 { 2570 struct mlx5_devcom_comp_dev *sd_devcom = mlx5_sd_get_devcom(dev); 2571 struct mlx5_core_dev *primary = dev; 2572 struct mlx5_lag *ldev; 2573 struct lag_func *pf; 2574 bool mpesw; 2575 int i; 2576 2577 ldev = mlx5_lag_dev(dev); 2578 if (!ldev) 2579 return; 2580 2581 if (sd_devcom) { 2582 mlx5_devcom_comp_lock(sd_devcom); 2583 primary = mlx5_sd_get_primary(dev) ?: dev; 2584 mlx5_devcom_comp_unlock(sd_devcom); 2585 } 2586 mlx5_devcom_comp_lock(primary->priv.hca_devcom_comp); 2587 mpesw = ldev->mode == MLX5_LAG_MODE_MPESW; 2588 if (mpesw) 2589 mlx5_mpesw_sd_devcoms_lock(ldev); 2590 mutex_lock(&ldev->lock); 2591 2592 ldev->mode_changes_in_progress++; 2593 if (__mlx5_lag_is_active(ldev)) { 2594 if (ldev->mode == MLX5_LAG_MODE_MPESW) 2595 mlx5_lag_disable_mpesw(ldev); 2596 else 2597 mlx5_disable_lag(ldev); 2598 } 2599 2600 mutex_unlock(&ldev->lock); 2601 if (mpesw) 2602 mlx5_mpesw_sd_devcoms_unlock(ldev); 2603 mlx5_devcom_comp_unlock(primary->priv.hca_devcom_comp); 2604 2605 if (!sd_devcom) 2606 return; 2607 2608 /* Teardown SD shared FDB for this device's group if active */ 2609 mlx5_devcom_comp_lock(sd_devcom); 2610 mutex_lock(&ldev->lock); 2611 mlx5_lag_for_each(i, 0, ldev, MLX5_LAG_FILTER_ALL) { 2612 pf = mlx5_lag_pf(ldev, i); 2613 if (pf->dev == dev && pf->sd_fdb_active) { 2614 mlx5_lag_shared_fdb_destroy(ldev, pf->group_id); 2615 break; 2616 } 2617 } 2618 mutex_unlock(&ldev->lock); 2619 mlx5_devcom_comp_unlock(sd_devcom); 2620 } 2621 2622 void mlx5_lag_enable_change(struct mlx5_core_dev *dev) 2623 { 2624 struct mlx5_lag *ldev; 2625 2626 ldev = mlx5_lag_dev(dev); 2627 if (!ldev) 2628 return; 2629 2630 mutex_lock(&ldev->lock); 2631 ldev->mode_changes_in_progress--; 2632 mutex_unlock(&ldev->lock); 2633 mlx5_queue_bond_work(ldev, 0); 2634 } 2635 2636 u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev, 2637 struct net_device *slave) 2638 { 2639 struct mlx5_lag *ldev; 2640 unsigned long flags; 2641 struct lag_func *pf; 2642 u8 port = 0; 2643 int i; 2644 2645 spin_lock_irqsave(&lag_lock, flags); 2646 ldev = mlx5_lag_dev(dev); 2647 if (!(ldev && __mlx5_lag_is_roce(ldev))) 2648 goto unlock; 2649 2650 mlx5_ldev_for_each(i, 0, ldev) { 2651 pf = mlx5_lag_pf(ldev, i); 2652 if (pf->netdev == slave) { 2653 port = i; 2654 break; 2655 } 2656 } 2657 2658 port = ldev->v2p_map[port * ldev->buckets]; 2659 2660 unlock: 2661 spin_unlock_irqrestore(&lag_lock, flags); 2662 return port; 2663 } 2664 EXPORT_SYMBOL(mlx5_lag_get_slave_port); 2665 2666 u8 mlx5_lag_get_num_ports(struct mlx5_core_dev *dev) 2667 { 2668 struct mlx5_lag *ldev; 2669 2670 ldev = mlx5_lag_dev(dev); 2671 if (!ldev) 2672 return 0; 2673 2674 return ldev->ports; 2675 } 2676 EXPORT_SYMBOL(mlx5_lag_get_num_ports); 2677 2678 struct mlx5_core_dev *mlx5_lag_get_next_peer_mdev(struct mlx5_core_dev *dev, int *i) 2679 { 2680 struct mlx5_core_dev *peer_dev = NULL; 2681 struct mlx5_lag *ldev; 2682 unsigned long flags; 2683 struct lag_func *pf; 2684 int idx; 2685 2686 spin_lock_irqsave(&lag_lock, flags); 2687 ldev = mlx5_lag_dev(dev); 2688 if (!ldev) 2689 goto unlock; 2690 2691 if (*i == MLX5_MAX_PORTS) 2692 goto unlock; 2693 mlx5_lag_for_each(idx, *i, ldev, mlx5_lag_get_filter(ldev, dev)) { 2694 pf = mlx5_lag_pf(ldev, idx); 2695 if (pf->dev != dev) 2696 break; 2697 } 2698 2699 if (idx == MLX5_MAX_PORTS) { 2700 *i = idx; 2701 goto unlock; 2702 } 2703 *i = idx + 1; 2704 2705 pf = mlx5_lag_pf(ldev, idx); 2706 peer_dev = pf->dev; 2707 2708 unlock: 2709 spin_unlock_irqrestore(&lag_lock, flags); 2710 return peer_dev; 2711 } 2712 EXPORT_SYMBOL(mlx5_lag_get_next_peer_mdev); 2713 2714 int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, 2715 u64 *values, 2716 int num_counters, 2717 size_t *offsets) 2718 { 2719 int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out); 2720 struct mlx5_core_dev **mdev; 2721 int ret = 0, i, j, idx = 0; 2722 struct mlx5_lag *ldev; 2723 unsigned long flags; 2724 struct lag_func *pf; 2725 int num_ports; 2726 void *out; 2727 2728 out = kvzalloc(outlen, GFP_KERNEL); 2729 if (!out) 2730 return -ENOMEM; 2731 2732 mdev = kvzalloc(sizeof(mdev[0]) * MLX5_MAX_PORTS, GFP_KERNEL); 2733 if (!mdev) { 2734 ret = -ENOMEM; 2735 goto free_out; 2736 } 2737 2738 memset(values, 0, sizeof(*values) * num_counters); 2739 2740 spin_lock_irqsave(&lag_lock, flags); 2741 ldev = mlx5_lag_dev(dev); 2742 if (ldev && __mlx5_lag_is_active(ldev)) { 2743 num_ports = ldev->ports; 2744 mlx5_ldev_for_each(i, 0, ldev) { 2745 pf = mlx5_lag_pf(ldev, i); 2746 mdev[idx++] = pf->dev; 2747 } 2748 } else { 2749 num_ports = 1; 2750 mdev[MLX5_LAG_P1] = dev; 2751 } 2752 spin_unlock_irqrestore(&lag_lock, flags); 2753 2754 for (i = 0; i < num_ports; ++i) { 2755 u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {}; 2756 2757 MLX5_SET(query_cong_statistics_in, in, opcode, 2758 MLX5_CMD_OP_QUERY_CONG_STATISTICS); 2759 ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in, 2760 out); 2761 if (ret) 2762 goto free_mdev; 2763 2764 for (j = 0; j < num_counters; ++j) 2765 values[j] += be64_to_cpup((__be64 *)(out + offsets[j])); 2766 } 2767 2768 free_mdev: 2769 kvfree(mdev); 2770 free_out: 2771 kvfree(out); 2772 return ret; 2773 } 2774 EXPORT_SYMBOL(mlx5_lag_query_cong_counters); 2775