1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ 3 4 #include "lib/sd.h" 5 #include "../lag/lag.h" 6 #include "mlx5_core.h" 7 #include "lib/mlx5.h" 8 #include "devlink.h" 9 #include "eswitch.h" 10 #include "fs_cmd.h" 11 #include <linux/mlx5/eswitch.h> 12 #include <linux/mlx5/vport.h> 13 #include <linux/debugfs.h> 14 15 #define sd_info(__dev, format, ...) \ 16 dev_info((__dev)->device, "Socket-Direct: " format, ##__VA_ARGS__) 17 #define sd_warn(__dev, format, ...) \ 18 dev_warn((__dev)->device, "Socket-Direct: " format, ##__VA_ARGS__) 19 20 struct mlx5_sd { 21 u32 group_id; 22 u8 host_buses; 23 struct mlx5_devcom_comp_dev *devcom; 24 struct dentry *dfs; 25 u8 state; 26 bool primary; 27 bool fw_silents_secondaries; 28 union { 29 struct { /* primary */ 30 struct mlx5_core_dev *secondaries[MLX5_SD_MAX_GROUP_SZ - 1]; 31 struct mlx5_flow_table *tx_ft; 32 /* Next index for secondary registration */ 33 u8 next_secondary_idx; 34 }; 35 struct { /* secondary */ 36 struct mlx5_core_dev *primary_dev; 37 u32 alias_obj_id; 38 /* TX flow table root in switchdev (silent) config */ 39 bool tx_root_silent; 40 }; 41 }; 42 }; 43 44 enum mlx5_sd_state { 45 MLX5_SD_STATE_DOWN = 0, 46 MLX5_SD_STATE_UP, 47 }; 48 49 static int mlx5_sd_get_host_buses(struct mlx5_core_dev *dev) 50 { 51 struct mlx5_sd *sd = mlx5_get_sd(dev); 52 53 if (!sd) 54 return 1; 55 56 return sd->host_buses; 57 } 58 59 struct mlx5_core_dev *mlx5_sd_get_primary(struct mlx5_core_dev *dev) 60 { 61 struct mlx5_sd *sd = mlx5_get_sd(dev); 62 63 if (!sd) 64 return dev; 65 66 if (!mlx5_devcom_comp_is_ready(sd->devcom)) 67 return NULL; 68 69 return sd->primary ? dev : sd->primary_dev; 70 } 71 72 struct mlx5_devcom_comp_dev *mlx5_sd_get_devcom(struct mlx5_core_dev *dev) 73 { 74 struct mlx5_sd *sd = mlx5_get_sd(dev); 75 76 if (!sd) 77 return NULL; 78 79 return sd->devcom; 80 } 81 82 bool mlx5_sd_is_primary(struct mlx5_core_dev *dev) 83 { 84 struct mlx5_sd *sd = mlx5_get_sd(dev); 85 86 if (!sd) 87 return true; 88 89 return sd->primary; 90 } 91 92 int mlx5_sd_pf_num_get(struct mlx5_core_dev *dev) 93 { 94 struct mlx5_sd *sd = mlx5_get_sd(dev); 95 int pf_num = mlx5_get_dev_index(dev); 96 struct mlx5_core_dev *pos; 97 int i; 98 99 if (!sd) 100 return pf_num; 101 102 mlx5_devcom_comp_assert_locked(sd->devcom); 103 if (!mlx5_devcom_comp_is_ready(sd->devcom)) 104 return -ENODEV; 105 106 mlx5_sd_for_each_dev(i, mlx5_sd_get_primary(dev), pos) 107 if (pos == dev) 108 break; 109 110 return pf_num * sd->host_buses + i; 111 } 112 113 struct mlx5_core_dev * 114 mlx5_sd_primary_get_peer(struct mlx5_core_dev *primary, int idx) 115 { 116 struct mlx5_sd *sd; 117 118 if (idx == 0) 119 return primary; 120 121 if (idx >= mlx5_sd_get_host_buses(primary)) 122 return NULL; 123 124 sd = mlx5_get_sd(primary); 125 return sd->secondaries[idx - 1]; 126 } 127 128 int mlx5_sd_ch_ix_get_dev_ix(struct mlx5_core_dev *dev, int ch_ix) 129 { 130 if (is_mdev_switchdev_mode(dev)) 131 return 0; 132 133 return ch_ix % mlx5_sd_get_host_buses(dev); 134 } 135 136 int mlx5_sd_ch_ix_get_vec_ix(struct mlx5_core_dev *dev, int ch_ix) 137 { 138 if (is_mdev_switchdev_mode(dev)) 139 return ch_ix; 140 141 return ch_ix / mlx5_sd_get_host_buses(dev); 142 } 143 144 struct mlx5_core_dev *mlx5_sd_ch_ix_get_dev(struct mlx5_core_dev *primary, int ch_ix) 145 { 146 int mdev_idx = mlx5_sd_ch_ix_get_dev_ix(primary, ch_ix); 147 148 return mlx5_sd_primary_get_peer(primary, mdev_idx); 149 } 150 151 static bool ft_create_alias_supported(struct mlx5_core_dev *dev) 152 { 153 u64 obj_allowed = MLX5_CAP_GEN_2_64(dev, allowed_object_for_other_vhca_access); 154 u32 obj_supp = MLX5_CAP_GEN_2(dev, cross_vhca_object_to_object_supported); 155 156 if (!(obj_supp & 157 MLX5_CROSS_VHCA_OBJ_TO_OBJ_SUPPORTED_LOCAL_FLOW_TABLE_ROOT_TO_REMOTE_FLOW_TABLE)) 158 return false; 159 160 if (!(obj_allowed & MLX5_ALLOWED_OBJ_FOR_OTHER_VHCA_ACCESS_FLOW_TABLE)) 161 return false; 162 163 return true; 164 } 165 166 static int mlx5_query_sd(struct mlx5_core_dev *dev, bool *sdm, 167 u8 *host_buses) 168 { 169 u32 out[MLX5_ST_SZ_DW(mpir_reg)]; 170 int err; 171 172 err = mlx5_query_mpir_reg(dev, out); 173 if (err) 174 return err; 175 176 *sdm = MLX5_GET(mpir_reg, out, sdm); 177 *host_buses = MLX5_GET(mpir_reg, out, host_buses); 178 179 return 0; 180 } 181 182 static u32 mlx5_sd_group_id(struct mlx5_core_dev *dev, u8 sd_group) 183 { 184 return (u32)((MLX5_CAP_GEN(dev, native_port_num) << 8) | sd_group); 185 } 186 187 static bool mlx5_sd_caps_supported(struct mlx5_core_dev *dev, u8 host_buses) 188 { 189 /* Honor the SW implementation limit */ 190 if (host_buses > MLX5_SD_MAX_GROUP_SZ) 191 return false; 192 193 /* Disconnect secondaries from the network */ 194 if (!MLX5_CAP_GEN(dev, eswitch_manager)) 195 return false; 196 if (!MLX5_CAP_GEN(dev, silent_mode_set) && 197 !MLX5_CAP_GEN(dev, silent_mode_query)) 198 return false; 199 200 /* RX steering from primary to secondaries */ 201 if (!MLX5_CAP_GEN(dev, cross_vhca_rqt)) 202 return false; 203 if (host_buses > MLX5_CAP_GEN_2(dev, max_rqt_vhca_id)) 204 return false; 205 206 /* TX steering from secondaries to primary */ 207 if (!ft_create_alias_supported(dev)) 208 return false; 209 if (!MLX5_CAP_FLOWTABLE_NIC_TX(dev, reset_root_to_default)) 210 return false; 211 212 return true; 213 } 214 215 bool mlx5_sd_is_supported(struct mlx5_core_dev *dev) 216 { 217 u8 host_buses, sd_group; 218 bool sdm; 219 int err; 220 221 /* Feature is currently implemented for PFs only */ 222 if (!mlx5_core_is_pf(dev)) 223 return false; 224 225 err = mlx5_query_nic_vport_sd_group(dev, &sd_group); 226 if (err || !sd_group) 227 return false; 228 229 if (!MLX5_CAP_MCAM_REG(dev, mpir)) 230 return false; 231 232 err = mlx5_query_sd(dev, &sdm, &host_buses); 233 if (err || !sdm) 234 return false; 235 236 return mlx5_sd_caps_supported(dev, host_buses); 237 } 238 239 static int sd_init(struct mlx5_core_dev *dev) 240 { 241 u8 host_buses, sd_group; 242 struct mlx5_sd *sd; 243 u32 group_id; 244 bool sdm; 245 int err; 246 247 /* Feature is currently implemented for PFs only */ 248 if (!mlx5_core_is_pf(dev)) 249 return 0; 250 251 err = mlx5_query_nic_vport_sd_group(dev, &sd_group); 252 if (err) 253 return err; 254 255 if (!sd_group) 256 return 0; 257 258 if (!MLX5_CAP_MCAM_REG(dev, mpir)) 259 return 0; 260 261 err = mlx5_query_sd(dev, &sdm, &host_buses); 262 if (err) 263 return err; 264 265 if (!sdm) 266 return 0; 267 268 group_id = mlx5_sd_group_id(dev, sd_group); 269 270 if (!mlx5_sd_caps_supported(dev, host_buses)) { 271 sd_warn(dev, "can't support requested netdev combining for group id 0x%x, skipping\n", 272 group_id); 273 return 0; 274 } 275 276 sd = kzalloc_obj(*sd); 277 if (!sd) 278 return -ENOMEM; 279 280 sd->host_buses = host_buses; 281 sd->group_id = group_id; 282 283 mlx5_set_sd(dev, sd); 284 285 return 0; 286 } 287 288 static void sd_cleanup(struct mlx5_core_dev *dev) 289 { 290 struct mlx5_sd *sd = mlx5_get_sd(dev); 291 292 mlx5_set_sd(dev, NULL); 293 kfree(sd); 294 } 295 296 static int sd_lag_state_show(struct seq_file *file, void *priv) 297 { 298 struct mlx5_core_dev *dev = file->private; 299 struct mlx5_lag *ldev; 300 struct lag_func *pf; 301 bool active = false; 302 int i; 303 304 ldev = mlx5_lag_dev(dev); 305 if (!ldev) 306 return -EINVAL; 307 308 mutex_lock(&ldev->lock); 309 mlx5_ldev_for_each(i, 0, ldev) { 310 pf = mlx5_lag_pf(ldev, i); 311 if (pf->dev == dev) { 312 active = pf->sd_fdb_active; 313 break; 314 } 315 } 316 mutex_unlock(&ldev->lock); 317 318 seq_printf(file, "%s\n", active ? "active" : "disabled"); 319 return 0; 320 } 321 322 DEFINE_SHOW_ATTRIBUTE(sd_lag_state); 323 324 /* SD LAG integration is optional. If LAG isn't available on this device 325 * (e.g. lag caps are off), or registering secondaries fails, just warn 326 * and continue - SD can operate without the LAG-side bookkeeping. 327 */ 328 static void sd_lag_init(struct mlx5_core_dev *dev) 329 { 330 struct mlx5_core_dev *primary = mlx5_sd_get_primary(dev); 331 struct mlx5_sd *sd = mlx5_get_sd(primary); 332 struct mlx5_core_dev *pos, *to; 333 struct mlx5_lag *ldev; 334 struct lag_func *pf; 335 int err; 336 int i; 337 338 ldev = mlx5_lag_dev(primary); 339 if (!ldev) { 340 sd_warn(primary, "%s: no ldev (LAG caps off?), skipping\n", 341 __func__); 342 return; 343 } 344 345 mutex_lock(&ldev->lock); 346 pf = mlx5_lag_pf_by_dev(ldev, primary); 347 if (!pf) { 348 sd_warn(primary, "%s: primary not registered in ldev, skipping\n", 349 __func__); 350 goto out; 351 } 352 353 pf->group_id = sd->group_id; 354 355 mlx5_sd_for_each_secondary(i, primary, pos) { 356 err = mlx5_ldev_add_mdev(ldev, pos, sd->group_id); 357 if (err) { 358 sd_warn(primary, "%s: failed to add secondary %s to ldev: %d\n", 359 __func__, dev_name(pos->device), err); 360 goto err; 361 } 362 } 363 364 out: 365 mutex_unlock(&ldev->lock); 366 return; 367 368 err: 369 to = pos; 370 mlx5_sd_for_each_secondary_to(i, primary, to, pos) 371 mlx5_ldev_remove_mdev(ldev, pos); 372 pf->group_id = 0; 373 mutex_unlock(&ldev->lock); 374 } 375 376 static void sd_lag_cleanup(struct mlx5_core_dev *dev) 377 { 378 struct mlx5_core_dev *primary = mlx5_sd_get_primary(dev); 379 struct mlx5_core_dev *pos; 380 struct mlx5_lag *ldev; 381 struct lag_func *pf; 382 int i; 383 384 ldev = mlx5_lag_dev(primary); 385 if (!ldev) 386 return; 387 388 mutex_lock(&ldev->lock); 389 mlx5_sd_for_each_secondary(i, primary, pos) 390 mlx5_ldev_remove_mdev(ldev, pos); 391 392 pf = mlx5_lag_pf_by_dev(ldev, primary); 393 if (pf) 394 pf->group_id = 0; 395 mutex_unlock(&ldev->lock); 396 } 397 398 enum { 399 SD_PRIMARY_SET, 400 SD_SECONDARIES_SET, 401 SD_FW_SILENT_CHECK, 402 }; 403 404 static int sd_handle_fw_silent_check(struct mlx5_core_dev *dev, 405 struct mlx5_core_dev *peer) 406 { 407 struct mlx5_sd *peer_sd = mlx5_get_sd(peer); 408 struct mlx5_sd *sd = mlx5_get_sd(dev); 409 u8 dev_silent = 0, peer_silent = 0; 410 int err; 411 412 if (peer_sd->fw_silents_secondaries) { 413 sd->fw_silents_secondaries = true; 414 return 0; 415 } 416 417 err = mlx5_fs_cmd_query_l2table_silent(dev, &dev_silent); 418 if (err) { 419 sd_warn(dev, "Failed to query silent mode for dev: %d\n", err); 420 return err; 421 } 422 423 err = mlx5_fs_cmd_query_l2table_silent(peer, &peer_silent); 424 if (err) { 425 sd_warn(dev, "Failed to query silent mode for peer: %d\n", err); 426 return err; 427 } 428 429 if (dev_silent || peer_silent) { 430 sd->fw_silents_secondaries = true; 431 peer_sd->fw_silents_secondaries = true; 432 sd_info(dev, "FW indicates at least one device is silent\n"); 433 } 434 return 0; 435 } 436 437 static int sd_handle_primary_set(struct mlx5_core_dev *dev, 438 struct mlx5_core_dev *peer) 439 { 440 struct mlx5_sd *peer_sd = mlx5_get_sd(peer); 441 struct mlx5_sd *sd = mlx5_get_sd(dev); 442 struct mlx5_core_dev *candidate; 443 struct mlx5_sd *candidate_sd; 444 bool dev_should_be_primary; 445 446 /* Peer is the device that being sent to all the other devices in the 447 * group. Hence, use peer to get the candidate device. 448 */ 449 candidate = peer_sd->primary ? peer : peer_sd->primary_dev; 450 451 if (sd->fw_silents_secondaries) { 452 u8 candidate_silent = 0; 453 int err; 454 455 err = mlx5_fs_cmd_query_l2table_silent(candidate, 456 &candidate_silent); 457 if (err) { 458 sd_warn(candidate, "Failed to query silent mode for dev: %d\n", 459 err); 460 return err; 461 } 462 /* Candidate is silent, dev should be primary */ 463 dev_should_be_primary = candidate_silent; 464 } else { 465 /* No FW silent mode, use bus number */ 466 dev_should_be_primary = 467 dev->pdev->bus->number < candidate->pdev->bus->number; 468 } 469 470 if (!dev_should_be_primary) 471 return 0; 472 473 candidate_sd = mlx5_get_sd(candidate); 474 475 sd->primary = true; 476 candidate_sd->primary = false; 477 candidate_sd->primary_dev = dev; 478 peer_sd->primary = false; 479 peer_sd->primary_dev = dev; 480 return 0; 481 } 482 483 static void sd_handle_secondaries_set(struct mlx5_core_dev *dev, 484 struct mlx5_core_dev *peer) 485 { 486 struct mlx5_sd *peer_sd = mlx5_get_sd(peer); 487 struct mlx5_sd *sd = mlx5_get_sd(dev); 488 u8 idx; 489 490 /* Primary has nothing to register with itself. */ 491 if (sd->primary) 492 return; 493 494 /* dev is a secondary device, peer is the primary device. 495 * Secondary registers itself with the primary. 496 */ 497 idx = peer_sd->next_secondary_idx++; 498 peer_sd->secondaries[idx] = dev; 499 sd->primary_dev = peer; 500 } 501 502 static int mlx5_sd_devcom_event(int event, void *my_data, void *event_data) 503 { 504 struct mlx5_core_dev *peer = event_data; 505 struct mlx5_core_dev *dev = my_data; 506 507 switch (event) { 508 case SD_FW_SILENT_CHECK: 509 return sd_handle_fw_silent_check(dev, peer); 510 case SD_PRIMARY_SET: 511 return sd_handle_primary_set(dev, peer); 512 case SD_SECONDARIES_SET: 513 sd_handle_secondaries_set(dev, peer); 514 return 0; 515 } 516 517 return 0; 518 } 519 520 static int sd_register(struct mlx5_core_dev *dev) 521 { 522 struct mlx5_devcom_match_attr attr = {}; 523 struct mlx5_devcom_comp_dev *devcom; 524 struct mlx5_core_dev *primary; 525 struct mlx5_sd *primary_sd; 526 struct mlx5_sd *sd; 527 int err; 528 529 sd = mlx5_get_sd(dev); 530 attr.key.val = sd->group_id; 531 attr.flags = MLX5_DEVCOM_MATCH_FLAGS_NS; 532 attr.net = mlx5_core_net(dev); 533 devcom = mlx5_devcom_register_component(dev->priv.devc, 534 MLX5_DEVCOM_SD_GROUP, 535 &attr, mlx5_sd_devcom_event, 536 dev); 537 if (!devcom) 538 return -EINVAL; 539 540 sd->devcom = devcom; 541 542 mlx5_devcom_comp_lock(devcom); 543 if (mlx5_devcom_comp_get_size(devcom) != sd->host_buses || 544 mlx5_devcom_comp_is_ready(devcom)) 545 goto out; 546 547 /* If silent mode query is supported, ask each device whether it is 548 * silent and propagate the result to the whole group. In each group 549 * only one device is not silent 550 */ 551 if (MLX5_CAP_GEN(dev, silent_mode_query)) { 552 err = mlx5_devcom_locked_send_event(devcom, SD_FW_SILENT_CHECK, 553 SD_FW_SILENT_CHECK, dev); 554 if (err) 555 goto err_devcom_unreg; 556 } 557 558 /* Send SD_PRIMARY_SET event with this device. 559 * All peers will receive this event and compare to this device. 560 * If fw_silents_secondaries is set, choose non-silent device. 561 * Otherwise use bus number. 562 */ 563 sd->primary = true; 564 err = mlx5_devcom_locked_send_event(devcom, SD_PRIMARY_SET, 565 SD_PRIMARY_SET, dev); 566 if (err) 567 goto err_devcom_unreg; 568 569 /* Broadcast SD_SECONDARIES_SET. Each non-sender peer's handler runs; 570 * the primary's handler returns early so only secondaries register. 571 */ 572 primary = sd->primary ? dev : sd->primary_dev; 573 if (!sd->primary) 574 sd_handle_secondaries_set(dev, primary); 575 mlx5_devcom_locked_send_event(devcom, SD_SECONDARIES_SET, 576 DEVCOM_CANT_FAIL, primary); 577 578 primary_sd = mlx5_get_sd(primary); 579 if (primary_sd->next_secondary_idx + 1 == sd->host_buses) 580 mlx5_devcom_comp_set_ready(devcom, true); 581 out: 582 mlx5_devcom_comp_unlock(devcom); 583 return 0; 584 585 err_devcom_unreg: 586 mlx5_devcom_comp_unlock(devcom); 587 mlx5_devcom_unregister_component(devcom); 588 return err; 589 } 590 591 static void sd_unregister(struct mlx5_core_dev *dev) 592 { 593 struct mlx5_sd *sd = mlx5_get_sd(dev); 594 595 mlx5_devcom_unregister_component(sd->devcom); 596 } 597 598 static int sd_cmd_set_primary(struct mlx5_core_dev *primary, u8 *alias_key) 599 { 600 struct mlx5_cmd_allow_other_vhca_access_attr allow_attr = {}; 601 struct mlx5_sd *sd = mlx5_get_sd(primary); 602 struct mlx5_flow_table_attr ft_attr = {}; 603 struct mlx5_flow_namespace *nic_ns; 604 struct mlx5_flow_table *ft; 605 int err; 606 607 nic_ns = mlx5_get_flow_namespace(primary, MLX5_FLOW_NAMESPACE_EGRESS); 608 if (!nic_ns) 609 return -EOPNOTSUPP; 610 611 ft = mlx5_create_flow_table(nic_ns, &ft_attr); 612 if (IS_ERR(ft)) { 613 err = PTR_ERR(ft); 614 return err; 615 } 616 sd->tx_ft = ft; 617 memcpy(allow_attr.access_key, alias_key, ACCESS_KEY_LEN); 618 allow_attr.obj_type = MLX5_GENERAL_OBJECT_TYPES_FLOW_TABLE_ALIAS; 619 allow_attr.obj_id = (ft->type << FT_ID_FT_TYPE_OFFSET) | ft->id; 620 621 err = mlx5_cmd_allow_other_vhca_access(primary, &allow_attr); 622 if (err) { 623 mlx5_core_err(primary, "Failed to allow other vhca access err=%d\n", 624 err); 625 mlx5_destroy_flow_table(ft); 626 return err; 627 } 628 629 return 0; 630 } 631 632 static void sd_cmd_unset_primary(struct mlx5_core_dev *primary) 633 { 634 struct mlx5_sd *sd = mlx5_get_sd(primary); 635 636 mlx5_destroy_flow_table(sd->tx_ft); 637 } 638 639 static int sd_secondary_create_alias_ft(struct mlx5_core_dev *secondary, 640 struct mlx5_core_dev *primary, 641 struct mlx5_flow_table *ft, 642 u32 *obj_id, u8 *alias_key) 643 { 644 u32 aliased_object_id = (ft->type << FT_ID_FT_TYPE_OFFSET) | ft->id; 645 u16 vhca_id_to_be_accessed = MLX5_CAP_GEN(primary, vhca_id); 646 struct mlx5_cmd_alias_obj_create_attr alias_attr = {}; 647 int ret; 648 649 memcpy(alias_attr.access_key, alias_key, ACCESS_KEY_LEN); 650 alias_attr.obj_id = aliased_object_id; 651 alias_attr.obj_type = MLX5_GENERAL_OBJECT_TYPES_FLOW_TABLE_ALIAS; 652 alias_attr.vhca_id = vhca_id_to_be_accessed; 653 ret = mlx5_cmd_alias_obj_create(secondary, &alias_attr, obj_id); 654 if (ret) { 655 mlx5_core_err(secondary, "Failed to create alias object err=%d\n", 656 ret); 657 return ret; 658 } 659 660 return 0; 661 } 662 663 static void sd_secondary_destroy_alias_ft(struct mlx5_core_dev *secondary) 664 { 665 struct mlx5_sd *sd = mlx5_get_sd(secondary); 666 667 mlx5_cmd_alias_obj_destroy(secondary, sd->alias_obj_id, 668 MLX5_GENERAL_OBJECT_TYPES_FLOW_TABLE_ALIAS); 669 } 670 671 static int mlx5_sd_secondary_conf_tx_root(struct mlx5_core_dev *secondary, 672 bool disconnect) 673 { 674 struct mlx5_sd *sd = mlx5_get_sd(secondary); 675 int err; 676 677 /* Idempotent: skip if TX root is already in the requested state. */ 678 if (sd->tx_root_silent == disconnect) 679 return 0; 680 681 if (disconnect) 682 err = mlx5_fs_cmd_set_tx_flow_table_root(secondary, 0, true); 683 else 684 err = mlx5_fs_cmd_set_tx_flow_table_root(secondary, 685 sd->alias_obj_id, 686 false); 687 if (err) 688 return err; 689 690 sd->tx_root_silent = disconnect; 691 return 0; 692 } 693 694 static int sd_cmd_set_secondary(struct mlx5_core_dev *secondary, 695 struct mlx5_core_dev *primary, 696 u8 *alias_key) 697 { 698 struct mlx5_sd *primary_sd = mlx5_get_sd(primary); 699 struct mlx5_sd *sd = mlx5_get_sd(secondary); 700 int err; 701 702 if (!primary_sd->fw_silents_secondaries) { 703 err = mlx5_fs_cmd_set_l2table_entry_silent(secondary, 1); 704 if (err) 705 return err; 706 } 707 708 err = sd_secondary_create_alias_ft(secondary, primary, primary_sd->tx_ft, 709 &sd->alias_obj_id, alias_key); 710 if (err) 711 goto err_unset_silent; 712 713 err = mlx5_fs_cmd_set_tx_flow_table_root(secondary, sd->alias_obj_id, 714 false); 715 if (err) 716 goto err_destroy_alias_ft; 717 sd->tx_root_silent = false; 718 719 return 0; 720 721 err_destroy_alias_ft: 722 sd_secondary_destroy_alias_ft(secondary); 723 err_unset_silent: 724 if (!primary_sd->fw_silents_secondaries) 725 mlx5_fs_cmd_set_l2table_entry_silent(secondary, 0); 726 return err; 727 } 728 729 static void sd_cmd_unset_secondary(struct mlx5_core_dev *secondary) 730 { 731 struct mlx5_sd *primary_sd; 732 733 primary_sd = mlx5_get_sd(mlx5_sd_get_primary(secondary)); 734 mlx5_sd_secondary_conf_tx_root(secondary, true); 735 sd_secondary_destroy_alias_ft(secondary); 736 if (!primary_sd->fw_silents_secondaries) 737 mlx5_fs_cmd_set_l2table_entry_silent(secondary, 0); 738 } 739 740 static void sd_print_group(struct mlx5_core_dev *primary) 741 { 742 struct mlx5_sd *sd = mlx5_get_sd(primary); 743 struct mlx5_core_dev *pos; 744 int i; 745 746 sd_info(primary, "group id %#x, primary %s, vhca %#x\n", 747 sd->group_id, pci_name(primary->pdev), 748 MLX5_CAP_GEN(primary, vhca_id)); 749 mlx5_sd_for_each_secondary(i, primary, pos) 750 sd_info(primary, "group id %#x, secondary_%d %s, vhca %#x\n", 751 sd->group_id, i - 1, pci_name(pos->pdev), 752 MLX5_CAP_GEN(pos, vhca_id)); 753 } 754 755 static ssize_t dev_read(struct file *filp, char __user *buf, size_t count, 756 loff_t *pos) 757 { 758 struct mlx5_core_dev *dev; 759 char tbuf[32]; 760 int ret; 761 762 dev = filp->private_data; 763 ret = snprintf(tbuf, sizeof(tbuf), "%s vhca %#x\n", pci_name(dev->pdev), 764 MLX5_CAP_GEN(dev, vhca_id)); 765 766 return simple_read_from_buffer(buf, count, pos, tbuf, ret); 767 } 768 769 static const struct file_operations dev_fops = { 770 .owner = THIS_MODULE, 771 .open = simple_open, 772 .read = dev_read, 773 }; 774 775 int mlx5_sd_init(struct mlx5_core_dev *dev) 776 { 777 struct mlx5_core_dev *primary, *pos, *to; 778 struct mlx5_sd *sd = mlx5_get_sd(dev); 779 u8 alias_key[ACCESS_KEY_LEN]; 780 struct mlx5_sd *primary_sd; 781 int err, i; 782 783 err = sd_init(dev); 784 if (err) 785 return err; 786 787 sd = mlx5_get_sd(dev); 788 if (!sd) 789 return 0; 790 791 err = sd_register(dev); 792 if (err) 793 goto err_sd_cleanup; 794 795 mlx5_devcom_comp_lock(sd->devcom); 796 if (!mlx5_devcom_comp_is_ready(sd->devcom)) 797 goto out; 798 799 primary = mlx5_sd_get_primary(dev); 800 if (!primary) 801 goto out; 802 803 primary_sd = mlx5_get_sd(primary); 804 if (primary_sd->state != MLX5_SD_STATE_DOWN) 805 goto out; 806 807 for (i = 0; i < ACCESS_KEY_LEN; i++) 808 alias_key[i] = get_random_u8(); 809 810 err = sd_cmd_set_primary(primary, alias_key); 811 if (err) 812 goto err_sd_unregister; 813 814 mlx5_sd_for_each_secondary(i, primary, pos) { 815 err = sd_cmd_set_secondary(pos, primary, alias_key); 816 if (err) 817 goto err_unset_secondaries; 818 } 819 820 sd_lag_init(primary); 821 822 primary_sd->dfs = 823 debugfs_create_dir("multi-pf", 824 mlx5_debugfs_get_dev_root(primary)); 825 mlx5_sd_for_each_secondary(i, primary, pos) { 826 char name[32]; 827 828 snprintf(name, sizeof(name), "secondary_%d", i - 1); 829 debugfs_create_file(name, 0400, primary_sd->dfs, pos, 830 &dev_fops); 831 } 832 833 debugfs_create_file("sd_lag_state", 0400, primary_sd->dfs, primary, 834 &sd_lag_state_fops); 835 debugfs_create_x32("group_id", 0400, primary_sd->dfs, 836 &primary_sd->group_id); 837 debugfs_create_file("primary", 0400, primary_sd->dfs, primary, 838 &dev_fops); 839 840 sd_info(primary, "group id %#x, size %d, combined\n", 841 sd->group_id, mlx5_devcom_comp_get_size(sd->devcom)); 842 sd_print_group(primary); 843 844 primary_sd->state = MLX5_SD_STATE_UP; 845 out: 846 mlx5_devcom_comp_unlock(sd->devcom); 847 return 0; 848 849 err_unset_secondaries: 850 to = pos; 851 mlx5_sd_for_each_secondary_to(i, primary, to, pos) 852 sd_cmd_unset_secondary(pos); 853 sd_cmd_unset_primary(primary); 854 err_sd_unregister: 855 mlx5_sd_for_each_secondary(i, primary, pos) { 856 struct mlx5_sd *peer_sd = mlx5_get_sd(pos); 857 858 primary_sd->secondaries[i - 1] = NULL; 859 peer_sd->primary_dev = NULL; 860 } 861 primary_sd->primary = false; 862 primary_sd->next_secondary_idx = 0; 863 mlx5_devcom_comp_set_ready(sd->devcom, false); 864 mlx5_devcom_comp_unlock(sd->devcom); 865 sd_unregister(dev); 866 err_sd_cleanup: 867 sd_cleanup(dev); 868 return err; 869 } 870 871 void mlx5_sd_cleanup(struct mlx5_core_dev *dev) 872 { 873 struct mlx5_sd *sd = mlx5_get_sd(dev); 874 struct mlx5_core_dev *primary, *pos; 875 struct mlx5_sd *primary_sd; 876 int i; 877 878 if (!sd) 879 return; 880 881 mlx5_devcom_comp_lock(sd->devcom); 882 if (!mlx5_devcom_comp_is_ready(sd->devcom)) 883 goto out_unlock; 884 885 primary = mlx5_sd_get_primary(dev); 886 if (!primary) 887 goto out_ready_false; 888 889 primary_sd = mlx5_get_sd(primary); 890 if (primary_sd->state != MLX5_SD_STATE_UP) 891 goto out_clear_peers; 892 893 debugfs_remove_recursive(primary_sd->dfs); 894 primary_sd->dfs = NULL; 895 sd_lag_cleanup(primary); 896 mlx5_sd_for_each_secondary(i, primary, pos) 897 sd_cmd_unset_secondary(pos); 898 sd_cmd_unset_primary(primary); 899 900 sd_info(primary, "group id %#x, uncombined\n", sd->group_id); 901 primary_sd->state = MLX5_SD_STATE_DOWN; 902 out_clear_peers: 903 mlx5_sd_for_each_secondary(i, primary, pos) { 904 struct mlx5_sd *peer_sd = mlx5_get_sd(pos); 905 906 primary_sd->secondaries[i - 1] = NULL; 907 peer_sd->primary_dev = NULL; 908 } 909 primary_sd->primary = false; 910 primary_sd->next_secondary_idx = 0; 911 out_ready_false: 912 mlx5_devcom_comp_set_ready(sd->devcom, false); 913 out_unlock: 914 mlx5_devcom_comp_unlock(sd->devcom); 915 sd_unregister(dev); 916 sd_cleanup(dev); 917 } 918 919 /* Lock order: 920 * primary: actual_adev_lock -> SD devcom comp lock 921 * secondary: SD devcom comp lock -> (drop) -> actual_adev_lock 922 * The two locks are never held together, so no ABBA. 923 */ 924 struct auxiliary_device *mlx5_sd_get_adev(struct mlx5_core_dev *dev, 925 struct auxiliary_device *adev, 926 int idx) 927 { 928 struct mlx5_sd *sd = mlx5_get_sd(dev); 929 struct mlx5_core_dev *primary; 930 struct mlx5_adev *primary_adev; 931 932 if (!sd) 933 return adev; 934 935 mlx5_devcom_comp_lock(sd->devcom); 936 if (!mlx5_devcom_comp_is_ready(sd->devcom)) { 937 mlx5_devcom_comp_unlock(sd->devcom); 938 return NULL; 939 } 940 941 primary = mlx5_sd_get_primary(dev); 942 if (!primary || dev == primary) { 943 mlx5_devcom_comp_unlock(sd->devcom); 944 return adev; 945 } 946 947 primary_adev = primary->priv.adev[idx]; 948 get_device(&primary_adev->adev.dev); 949 mlx5_devcom_comp_unlock(sd->devcom); 950 951 device_lock(&primary_adev->adev.dev); 952 /* Primary may have completed remove between dropping devcom and 953 * acquiring device_lock; recheck. 954 */ 955 if (!mlx5_devcom_comp_is_ready(sd->devcom)) { 956 device_unlock(&primary_adev->adev.dev); 957 put_device(&primary_adev->adev.dev); 958 return NULL; 959 } 960 return &primary_adev->adev; 961 } 962 963 #ifdef CONFIG_MLX5_ESWITCH 964 /* All SD members must have completed esw_offloads_enable (i.e., reached 965 * mlx5_esw_offloads_devcom_init) and become eswitch-peers of the primary. 966 * Until then, mlx5_eswitch_is_peer() returns false for the not-yet-paired 967 * member and shared_fdb_supported_filter would reject. When all PFs transition 968 * in parallel, only the last one to finish satisfies this gate; the earlier 969 * ones return 0 silently here. 970 */ 971 static bool mlx5_sd_all_paired(struct mlx5_core_dev *primary) 972 { 973 struct mlx5_eswitch *primary_esw = primary->priv.eswitch; 974 struct mlx5_core_dev *pos; 975 int i; 976 977 mlx5_sd_for_each_secondary(i, primary, pos) { 978 if (!mlx5_eswitch_is_peer(primary_esw, pos->priv.eswitch)) 979 return false; 980 } 981 return true; 982 } 983 984 static void mlx5_sd_activate_shared_fdb(struct mlx5_core_dev *primary) 985 { 986 struct mlx5_sd *sd = mlx5_get_sd(primary); 987 struct mlx5_core_dev *pos; 988 struct mlx5_lag *ldev; 989 struct lag_func *pf; 990 int err; 991 int i; 992 993 ldev = mlx5_lag_dev(primary); 994 if (!ldev) { 995 sd_warn(primary, "Shared FDB MUST have ldev\n"); 996 return; 997 } 998 999 mutex_lock(&ldev->lock); 1000 1001 if (ldev->mode_changes_in_progress) 1002 goto unlock; 1003 1004 if (!mlx5_sd_all_paired(primary)) 1005 goto unlock; 1006 1007 /* Check if SD FDB is already active for this group */ 1008 mlx5_lag_for_each(i, 0, ldev, sd->group_id) { 1009 pf = mlx5_lag_pf(ldev, i); 1010 if (pf->sd_fdb_active) 1011 goto unlock; 1012 break; 1013 } 1014 1015 if (!mlx5_lag_shared_fdb_supported_filter(ldev, sd->group_id)) { 1016 sd_warn(primary, "Shared FDB not supported\n"); 1017 goto unlock; 1018 } 1019 1020 /* Initialize vport metadata for all group devices. This is deferred 1021 * from esw_offloads_enable() because mlx5_sd_pf_num_get() requires 1022 * the SD group to be ready. 1023 */ 1024 mlx5_sd_for_each_dev(i, primary, pos) { 1025 struct mlx5_eswitch *esw = pos->priv.eswitch; 1026 1027 err = mlx5_esw_offloads_init_deferred_metadata(esw); 1028 if (err) { 1029 sd_warn(primary, "Failed to init metadata for %s: %d\n", 1030 dev_name(pos->device), err); 1031 goto unlock; 1032 } 1033 } 1034 1035 err = mlx5_lag_shared_fdb_create(ldev, NULL, 0, sd->group_id); 1036 if (err) 1037 sd_warn(primary, "Failed to create shared FDB: %d\n", err); 1038 else 1039 sd_info(primary, "Shared FDB created\n"); 1040 1041 unlock: 1042 mutex_unlock(&ldev->lock); 1043 } 1044 1045 void mlx5_sd_eswitch_mode_set(struct mlx5_core_dev *dev, u16 mlx5_mode) 1046 { 1047 struct mlx5_core_dev *primary; 1048 struct mlx5_sd *sd; 1049 int err; 1050 1051 sd = mlx5_get_sd(dev); 1052 if (!sd || !mlx5_devcom_comp_is_ready(sd->devcom)) 1053 return; 1054 1055 mlx5_devcom_comp_lock(sd->devcom); 1056 if (!mlx5_devcom_comp_is_ready(sd->devcom)) 1057 goto unlock; 1058 1059 primary = mlx5_sd_get_primary(dev); 1060 1061 /* Secondary devices need TX root reconfiguration */ 1062 if (dev != primary) { 1063 bool disconnect = (mlx5_mode == MLX5_ESWITCH_OFFLOADS); 1064 1065 err = mlx5_sd_secondary_conf_tx_root(dev, disconnect); 1066 if (err) { 1067 sd_warn(dev, "Failed to set TX root: %d\n", err); 1068 goto unlock; 1069 } 1070 } 1071 1072 /* Try to activate shared FDB when all devices are in switchdev. 1073 * Shared FDB is optional - failure here doesn't fail the transition. 1074 */ 1075 if (mlx5_mode == MLX5_ESWITCH_OFFLOADS) 1076 mlx5_sd_activate_shared_fdb(primary); 1077 1078 unlock: 1079 mlx5_devcom_comp_unlock(sd->devcom); 1080 } 1081 1082 #endif /* CONFIG_MLX5_ESWITCH */ 1083 1084 void mlx5_sd_put_adev(struct auxiliary_device *actual_adev, 1085 struct auxiliary_device *adev) 1086 { 1087 if (actual_adev != adev) { 1088 device_unlock(&actual_adev->dev); 1089 put_device(&actual_adev->dev); 1090 } 1091 } 1092