1 /* 2 * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <linux/highmem.h> 34 #include <linux/module.h> 35 #include <linux/init.h> 36 #include <linux/errno.h> 37 #include <linux/pci.h> 38 #include <linux/dma-mapping.h> 39 #include <linux/slab.h> 40 #include <linux/interrupt.h> 41 #include <linux/delay.h> 42 #include <linux/mlx5/driver.h> 43 #include <linux/mlx5/cq.h> 44 #include <linux/mlx5/qp.h> 45 #include <linux/debugfs.h> 46 #include <linux/kmod.h> 47 #include <linux/mlx5/mlx5_ifc.h> 48 #include <linux/mlx5/vport.h> 49 #include <linux/version.h> 50 #include <net/devlink.h> 51 #include "mlx5_core.h" 52 #include "lib/eq.h" 53 #include "fs_core.h" 54 #include "lib/mpfs.h" 55 #include "eswitch.h" 56 #include "devlink.h" 57 #include "fw_reset.h" 58 #include "lib/mlx5.h" 59 #include "lib/tout.h" 60 #include "fpga/core.h" 61 #include "en_accel/ipsec.h" 62 #include "lib/clock.h" 63 #include "lib/vxlan.h" 64 #include "lib/geneve.h" 65 #include "lib/devcom.h" 66 #include "lib/pci_vsc.h" 67 #include "diag/fw_tracer.h" 68 #include "ecpf.h" 69 #include "lib/hv_vhca.h" 70 #include "diag/rsc_dump.h" 71 #include "sf/vhca_event.h" 72 #include "sf/dev/dev.h" 73 #include "sf/sf.h" 74 #include "mlx5_irq.h" 75 #include "hwmon.h" 76 #include "lag/lag.h" 77 #include "sh_devlink.h" 78 79 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>"); 80 MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) core driver"); 81 MODULE_LICENSE("Dual BSD/GPL"); 82 83 unsigned int mlx5_core_debug_mask; 84 module_param_named(debug_mask, mlx5_core_debug_mask, uint, 0644); 85 MODULE_PARM_DESC(debug_mask, "debug mask: 1 = dump cmd data, 2 = dump cmd exec time, 3 = both. Default=0"); 86 87 static unsigned int prof_sel = MLX5_DEFAULT_PROF; 88 module_param_named(prof_sel, prof_sel, uint, 0444); 89 MODULE_PARM_DESC(prof_sel, "profile selector. Valid range 0 - 2"); 90 91 static u32 sw_owner_id[4]; 92 #define MAX_SW_VHCA_ID (BIT(__mlx5_bit_sz(cmd_hca_cap_2, sw_vhca_id)) - 1) 93 static DEFINE_IDA(sw_vhca_ida); 94 95 enum { 96 MLX5_ATOMIC_REQ_MODE_BE = 0x0, 97 MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS = 0x1, 98 }; 99 100 #define LOG_MAX_SUPPORTED_QPS 0xff 101 102 static struct mlx5_profile profile[] = { 103 [0] = { 104 .mask = 0, 105 .num_cmd_caches = MLX5_NUM_COMMAND_CACHES, 106 }, 107 [1] = { 108 .mask = MLX5_PROF_MASK_QP_SIZE, 109 .log_max_qp = 12, 110 .num_cmd_caches = MLX5_NUM_COMMAND_CACHES, 111 112 }, 113 [2] = { 114 .mask = MLX5_PROF_MASK_QP_SIZE | 115 MLX5_PROF_MASK_MR_CACHE, 116 .log_max_qp = LOG_MAX_SUPPORTED_QPS, 117 .num_cmd_caches = MLX5_NUM_COMMAND_CACHES, 118 .mr_cache[0] = { 119 .size = 500, 120 .limit = 250 121 }, 122 .mr_cache[1] = { 123 .size = 500, 124 .limit = 250 125 }, 126 .mr_cache[2] = { 127 .size = 500, 128 .limit = 250 129 }, 130 .mr_cache[3] = { 131 .size = 500, 132 .limit = 250 133 }, 134 .mr_cache[4] = { 135 .size = 500, 136 .limit = 250 137 }, 138 .mr_cache[5] = { 139 .size = 500, 140 .limit = 250 141 }, 142 .mr_cache[6] = { 143 .size = 500, 144 .limit = 250 145 }, 146 .mr_cache[7] = { 147 .size = 500, 148 .limit = 250 149 }, 150 .mr_cache[8] = { 151 .size = 500, 152 .limit = 250 153 }, 154 .mr_cache[9] = { 155 .size = 500, 156 .limit = 250 157 }, 158 .mr_cache[10] = { 159 .size = 500, 160 .limit = 250 161 }, 162 .mr_cache[11] = { 163 .size = 500, 164 .limit = 250 165 }, 166 .mr_cache[12] = { 167 .size = 64, 168 .limit = 32 169 }, 170 .mr_cache[13] = { 171 .size = 32, 172 .limit = 16 173 }, 174 .mr_cache[14] = { 175 .size = 16, 176 .limit = 8 177 }, 178 .mr_cache[15] = { 179 .size = 8, 180 .limit = 4 181 }, 182 }, 183 [3] = { 184 .mask = MLX5_PROF_MASK_QP_SIZE, 185 .log_max_qp = LOG_MAX_SUPPORTED_QPS, 186 .num_cmd_caches = 0, 187 }, 188 }; 189 190 static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili, 191 u32 warn_time_mili, const char *init_state) 192 { 193 unsigned long warn = jiffies + msecs_to_jiffies(warn_time_mili); 194 unsigned long end = jiffies + msecs_to_jiffies(max_wait_mili); 195 u32 fw_initializing; 196 197 do { 198 fw_initializing = ioread32be(&dev->iseg->initializing); 199 if (!(fw_initializing >> 31)) 200 break; 201 if (time_after(jiffies, end)) { 202 mlx5_core_err(dev, "Firmware over %u MS in %s state, aborting\n", 203 max_wait_mili, init_state); 204 return -ETIMEDOUT; 205 } 206 if (test_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state)) { 207 mlx5_core_warn(dev, "device is being removed, stop waiting for FW %s\n", 208 init_state); 209 return -ENODEV; 210 } 211 if (warn_time_mili && time_after(jiffies, warn)) { 212 mlx5_core_warn(dev, "Waiting for FW %s, timeout abort in %ds (0x%x)\n", 213 init_state, jiffies_to_msecs(end - warn) / 1000, 214 fw_initializing); 215 warn = jiffies + msecs_to_jiffies(warn_time_mili); 216 } 217 msleep(mlx5_tout_ms(dev, FW_PRE_INIT_WAIT)); 218 } while (true); 219 220 return 0; 221 } 222 223 static void mlx5_set_driver_version(struct mlx5_core_dev *dev) 224 { 225 int driver_ver_sz = MLX5_FLD_SZ_BYTES(set_driver_version_in, 226 driver_version); 227 u8 in[MLX5_ST_SZ_BYTES(set_driver_version_in)] = {}; 228 char *string; 229 230 if (!MLX5_CAP_GEN(dev, driver_version)) 231 return; 232 233 string = MLX5_ADDR_OF(set_driver_version_in, in, driver_version); 234 235 snprintf(string, driver_ver_sz, "Linux,%s,%u.%u.%u", 236 KBUILD_MODNAME, LINUX_VERSION_MAJOR, 237 LINUX_VERSION_PATCHLEVEL, LINUX_VERSION_SUBLEVEL); 238 239 /*Send the command*/ 240 MLX5_SET(set_driver_version_in, in, opcode, 241 MLX5_CMD_OP_SET_DRIVER_VERSION); 242 243 mlx5_cmd_exec_in(dev, set_driver_version, in); 244 } 245 246 static int set_dma_caps(struct pci_dev *pdev) 247 { 248 int err; 249 250 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); 251 if (err) { 252 dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask\n"); 253 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); 254 if (err) { 255 dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting\n"); 256 return err; 257 } 258 } 259 260 dma_set_max_seg_size(&pdev->dev, 2u * 1024 * 1024 * 1024); 261 return err; 262 } 263 264 static int mlx5_pci_enable_device(struct mlx5_core_dev *dev) 265 { 266 struct pci_dev *pdev = dev->pdev; 267 int err = 0; 268 269 mutex_lock(&dev->pci_status_mutex); 270 if (dev->pci_status == MLX5_PCI_STATUS_DISABLED) { 271 err = pci_enable_device(pdev); 272 if (!err) 273 dev->pci_status = MLX5_PCI_STATUS_ENABLED; 274 } 275 mutex_unlock(&dev->pci_status_mutex); 276 277 return err; 278 } 279 280 static void mlx5_pci_disable_device(struct mlx5_core_dev *dev) 281 { 282 struct pci_dev *pdev = dev->pdev; 283 284 mutex_lock(&dev->pci_status_mutex); 285 if (dev->pci_status == MLX5_PCI_STATUS_ENABLED) { 286 pci_disable_device(pdev); 287 dev->pci_status = MLX5_PCI_STATUS_DISABLED; 288 } 289 mutex_unlock(&dev->pci_status_mutex); 290 } 291 292 static int request_bar(struct pci_dev *pdev) 293 { 294 int err = 0; 295 296 if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) { 297 dev_err(&pdev->dev, "Missing registers BAR, aborting\n"); 298 return -ENODEV; 299 } 300 301 err = pci_request_regions(pdev, KBUILD_MODNAME); 302 if (err) 303 dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n"); 304 305 return err; 306 } 307 308 static void release_bar(struct pci_dev *pdev) 309 { 310 pci_release_regions(pdev); 311 } 312 313 struct mlx5_reg_host_endianness { 314 u8 he; 315 u8 rsvd[15]; 316 }; 317 318 static u16 to_fw_pkey_sz(struct mlx5_core_dev *dev, u32 size) 319 { 320 switch (size) { 321 case 128: 322 return 0; 323 case 256: 324 return 1; 325 case 512: 326 return 2; 327 case 1024: 328 return 3; 329 case 2048: 330 return 4; 331 case 4096: 332 return 5; 333 default: 334 mlx5_core_warn(dev, "invalid pkey table size %d\n", size); 335 return 0; 336 } 337 } 338 339 void mlx5_core_uplink_netdev_set(struct mlx5_core_dev *dev, struct net_device *netdev) 340 { 341 mutex_lock(&dev->mlx5e_res.uplink_netdev_lock); 342 dev->mlx5e_res.uplink_netdev = netdev; 343 mlx5_blocking_notifier_call_chain(dev, MLX5_DRIVER_EVENT_UPLINK_NETDEV, 344 netdev); 345 mutex_unlock(&dev->mlx5e_res.uplink_netdev_lock); 346 } 347 348 void mlx5_core_uplink_netdev_event_replay(struct mlx5_core_dev *dev) 349 { 350 mutex_lock(&dev->mlx5e_res.uplink_netdev_lock); 351 mlx5_blocking_notifier_call_chain(dev, MLX5_DRIVER_EVENT_UPLINK_NETDEV, 352 dev->mlx5e_res.uplink_netdev); 353 mutex_unlock(&dev->mlx5e_res.uplink_netdev_lock); 354 } 355 EXPORT_SYMBOL(mlx5_core_uplink_netdev_event_replay); 356 357 void mlx5_core_mp_event_replay(struct mlx5_core_dev *dev, u32 event, void *data) 358 { 359 mlx5_blocking_notifier_call_chain(dev, event, data); 360 } 361 EXPORT_SYMBOL(mlx5_core_mp_event_replay); 362 363 int mlx5_core_get_caps_mode(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type, 364 enum mlx5_cap_mode cap_mode) 365 { 366 u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)]; 367 int out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); 368 void *out, *hca_caps; 369 u16 opmod = (cap_type << 1) | (cap_mode & 0x01); 370 int err; 371 372 if (WARN_ON(!dev->caps.hca[cap_type])) 373 /* this cap_type must be added to mlx5_hca_caps_alloc() */ 374 return -EINVAL; 375 376 memset(in, 0, sizeof(in)); 377 out = kzalloc(out_sz, GFP_KERNEL); 378 if (!out) 379 return -ENOMEM; 380 381 MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); 382 MLX5_SET(query_hca_cap_in, in, op_mod, opmod); 383 err = mlx5_cmd_exec_inout(dev, query_hca_cap, in, out); 384 if (err) { 385 mlx5_core_warn(dev, 386 "QUERY_HCA_CAP : type(%x) opmode(%x) Failed(%d)\n", 387 cap_type, cap_mode, err); 388 goto query_ex; 389 } 390 391 hca_caps = MLX5_ADDR_OF(query_hca_cap_out, out, capability); 392 393 switch (cap_mode) { 394 case HCA_CAP_OPMOD_GET_MAX: 395 memcpy(dev->caps.hca[cap_type]->max, hca_caps, 396 MLX5_UN_SZ_BYTES(hca_cap_union)); 397 break; 398 case HCA_CAP_OPMOD_GET_CUR: 399 memcpy(dev->caps.hca[cap_type]->cur, hca_caps, 400 MLX5_UN_SZ_BYTES(hca_cap_union)); 401 break; 402 default: 403 mlx5_core_warn(dev, 404 "Tried to query dev cap type(%x) with wrong opmode(%x)\n", 405 cap_type, cap_mode); 406 err = -EINVAL; 407 break; 408 } 409 query_ex: 410 kfree(out); 411 return err; 412 } 413 414 int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type) 415 { 416 int ret; 417 418 ret = mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_CUR); 419 if (ret) 420 return ret; 421 return mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_MAX); 422 } 423 424 static int set_caps(struct mlx5_core_dev *dev, void *in, int opmod) 425 { 426 MLX5_SET(set_hca_cap_in, in, opcode, MLX5_CMD_OP_SET_HCA_CAP); 427 MLX5_SET(set_hca_cap_in, in, op_mod, opmod << 1); 428 return mlx5_cmd_exec_in(dev, set_hca_cap, in); 429 } 430 431 static int handle_hca_cap_atomic(struct mlx5_core_dev *dev, void *set_ctx) 432 { 433 void *set_hca_cap; 434 int req_endianness; 435 int err; 436 437 if (!MLX5_CAP_GEN(dev, atomic)) 438 return 0; 439 440 err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC); 441 if (err) 442 return err; 443 444 req_endianness = 445 MLX5_CAP_ATOMIC(dev, 446 supported_atomic_req_8B_endianness_mode_1); 447 448 if (req_endianness != MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS) 449 return 0; 450 451 set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability); 452 453 /* Set requestor to host endianness */ 454 MLX5_SET(atomic_caps, set_hca_cap, atomic_req_8B_endianness_mode, 455 MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS); 456 457 return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_ATOMIC); 458 } 459 460 static int handle_hca_cap_odp(struct mlx5_core_dev *dev, void *set_ctx) 461 { 462 bool do_set = false, mem_page_fault = false; 463 void *set_hca_cap; 464 int err; 465 466 if (!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) || 467 !MLX5_CAP_GEN(dev, pg)) 468 return 0; 469 470 err = mlx5_core_get_caps(dev, MLX5_CAP_ODP); 471 if (err) 472 return err; 473 474 set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability); 475 memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_ODP]->cur, 476 MLX5_ST_SZ_BYTES(odp_cap)); 477 478 /* For best performance, enable memory scheme ODP only when 479 * it has page prefetch enabled. 480 */ 481 if (MLX5_CAP_ODP_MAX(dev, mem_page_fault) && 482 MLX5_CAP_ODP_MAX(dev, memory_page_fault_scheme_cap.page_prefetch)) { 483 mem_page_fault = true; 484 do_set = true; 485 MLX5_SET(odp_cap, set_hca_cap, mem_page_fault, mem_page_fault); 486 goto set; 487 } 488 489 #define ODP_CAP_SET_MAX(dev, field) \ 490 do { \ 491 u32 _res = MLX5_CAP_ODP_MAX(dev, field); \ 492 if (_res) { \ 493 do_set = true; \ 494 MLX5_SET(odp_cap, set_hca_cap, field, _res); \ 495 } \ 496 } while (0) 497 498 ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.ud_odp_caps.srq_receive); 499 ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.rc_odp_caps.srq_receive); 500 ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.xrc_odp_caps.srq_receive); 501 ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.xrc_odp_caps.send); 502 ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.xrc_odp_caps.receive); 503 ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.xrc_odp_caps.write); 504 ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.xrc_odp_caps.read); 505 ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.xrc_odp_caps.atomic); 506 ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.dc_odp_caps.srq_receive); 507 ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.dc_odp_caps.send); 508 ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.dc_odp_caps.receive); 509 ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.dc_odp_caps.write); 510 ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.dc_odp_caps.read); 511 ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.dc_odp_caps.atomic); 512 513 set: 514 if (do_set) 515 err = set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_ODP); 516 517 mlx5_core_dbg(dev, "Using ODP %s scheme\n", 518 mem_page_fault ? "memory" : "transport"); 519 return err; 520 } 521 522 static int max_uc_list_get_devlink_param(struct mlx5_core_dev *dev) 523 { 524 struct devlink *devlink = priv_to_devlink(dev); 525 union devlink_param_value val; 526 int err; 527 528 err = devl_param_driverinit_value_get(devlink, 529 DEVLINK_PARAM_GENERIC_ID_MAX_MACS, 530 &val); 531 if (!err) 532 return val.vu32; 533 mlx5_core_dbg(dev, "Failed to get param. err = %d\n", err); 534 return err; 535 } 536 537 bool mlx5_is_roce_on(struct mlx5_core_dev *dev) 538 { 539 struct devlink *devlink = priv_to_devlink(dev); 540 union devlink_param_value val; 541 int err; 542 543 err = devl_param_driverinit_value_get(devlink, 544 DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE, 545 &val); 546 547 if (!err) 548 return val.vbool; 549 550 mlx5_core_dbg(dev, "Failed to get param. err = %d\n", err); 551 return MLX5_CAP_GEN(dev, roce); 552 } 553 EXPORT_SYMBOL(mlx5_is_roce_on); 554 555 static int handle_hca_cap_2(struct mlx5_core_dev *dev, void *set_ctx) 556 { 557 bool do_set = false; 558 void *set_hca_cap; 559 int err; 560 561 if (!MLX5_CAP_GEN_MAX(dev, hca_cap_2)) 562 return 0; 563 564 err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL_2); 565 if (err) 566 return err; 567 568 set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, 569 capability); 570 memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_GENERAL_2]->cur, 571 MLX5_ST_SZ_BYTES(cmd_hca_cap_2)); 572 573 if (MLX5_CAP_GEN_2_MAX(dev, sw_vhca_id_valid) && 574 dev->priv.sw_vhca_id > 0) { 575 MLX5_SET(cmd_hca_cap_2, set_hca_cap, sw_vhca_id_valid, 1); 576 do_set = true; 577 } 578 579 if (MLX5_CAP_GEN_2_MAX(dev, lag_per_mp_group)) { 580 MLX5_SET(cmd_hca_cap_2, set_hca_cap, lag_per_mp_group, 1); 581 do_set = true; 582 } 583 584 /* some FW versions that support querying MLX5_CAP_GENERAL_2 585 * capabilities but don't support setting them. 586 * Skip unnecessary update to hca_cap_2 when no changes were introduced 587 */ 588 return do_set ? set_caps(dev, set_ctx, MLX5_CAP_GENERAL_2) : 0; 589 } 590 591 static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) 592 { 593 struct mlx5_profile *prof = &dev->profile; 594 void *set_hca_cap; 595 int max_uc_list; 596 int err; 597 598 err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL); 599 if (err) 600 return err; 601 602 set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, 603 capability); 604 memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_GENERAL]->cur, 605 MLX5_ST_SZ_BYTES(cmd_hca_cap)); 606 607 mlx5_core_dbg(dev, "Current Pkey table size %d Setting new size %d\n", 608 mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(dev, pkey_table_size)), 609 128); 610 /* we limit the size of the pkey table to 128 entries for now */ 611 MLX5_SET(cmd_hca_cap, set_hca_cap, pkey_table_size, 612 to_fw_pkey_sz(dev, 128)); 613 614 /* Check log_max_qp from HCA caps to set in current profile */ 615 if (prof->log_max_qp == LOG_MAX_SUPPORTED_QPS) { 616 prof->log_max_qp = min_t(u8, 18, MLX5_CAP_GEN_MAX(dev, log_max_qp)); 617 } else if (MLX5_CAP_GEN_MAX(dev, log_max_qp) < prof->log_max_qp) { 618 mlx5_core_warn(dev, "log_max_qp value in current profile is %d, changing it to HCA capability limit (%d)\n", 619 prof->log_max_qp, 620 MLX5_CAP_GEN_MAX(dev, log_max_qp)); 621 prof->log_max_qp = MLX5_CAP_GEN_MAX(dev, log_max_qp); 622 } 623 if (prof->mask & MLX5_PROF_MASK_QP_SIZE) 624 MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_qp, 625 prof->log_max_qp); 626 627 /* disable cmdif checksum */ 628 MLX5_SET(cmd_hca_cap, set_hca_cap, cmdif_checksum, 0); 629 630 /* Enable 4K UAR only when HCA supports it and page size is bigger 631 * than 4K. 632 */ 633 if (MLX5_CAP_GEN_MAX(dev, uar_4k) && PAGE_SIZE > 4096) 634 MLX5_SET(cmd_hca_cap, set_hca_cap, uar_4k, 1); 635 636 MLX5_SET(cmd_hca_cap, set_hca_cap, log_uar_page_sz, PAGE_SHIFT - 12); 637 638 if (MLX5_CAP_GEN_MAX(dev, cache_line_128byte)) 639 MLX5_SET(cmd_hca_cap, 640 set_hca_cap, 641 cache_line_128byte, 642 cache_line_size() >= 128 ? 1 : 0); 643 644 if (MLX5_CAP_GEN_MAX(dev, dct)) 645 MLX5_SET(cmd_hca_cap, set_hca_cap, dct, 1); 646 647 if (MLX5_CAP_GEN_MAX(dev, pci_sync_for_fw_update_event)) 648 MLX5_SET(cmd_hca_cap, set_hca_cap, pci_sync_for_fw_update_event, 1); 649 if (MLX5_CAP_GEN_MAX(dev, pci_sync_for_fw_update_with_driver_unload)) 650 MLX5_SET(cmd_hca_cap, set_hca_cap, 651 pci_sync_for_fw_update_with_driver_unload, 1); 652 if (MLX5_CAP_GEN_MAX(dev, pcie_reset_using_hotreset_method)) 653 MLX5_SET(cmd_hca_cap, set_hca_cap, 654 pcie_reset_using_hotreset_method, 1); 655 656 if (MLX5_CAP_GEN_MAX(dev, num_vhca_ports)) 657 MLX5_SET(cmd_hca_cap, 658 set_hca_cap, 659 num_vhca_ports, 660 MLX5_CAP_GEN_MAX(dev, num_vhca_ports)); 661 662 if (MLX5_CAP_GEN_MAX(dev, release_all_pages)) 663 MLX5_SET(cmd_hca_cap, set_hca_cap, release_all_pages, 1); 664 665 if (MLX5_CAP_GEN_MAX(dev, mkey_by_name)) 666 MLX5_SET(cmd_hca_cap, set_hca_cap, mkey_by_name, 1); 667 668 mlx5_vhca_state_cap_handle(dev, set_hca_cap); 669 670 if (MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix)) 671 MLX5_SET(cmd_hca_cap, set_hca_cap, num_total_dynamic_vf_msix, 672 MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix)); 673 674 if (MLX5_CAP_GEN(dev, roce_rw_supported) && MLX5_CAP_GEN_MAX(dev, roce)) 675 MLX5_SET(cmd_hca_cap, set_hca_cap, roce, 676 mlx5_is_roce_on(dev)); 677 678 max_uc_list = max_uc_list_get_devlink_param(dev); 679 if (max_uc_list > 0) 680 MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_current_uc_list, 681 ilog2(max_uc_list)); 682 683 /* enable absolute native port num */ 684 if (MLX5_CAP_GEN_MAX(dev, abs_native_port_num)) 685 MLX5_SET(cmd_hca_cap, set_hca_cap, abs_native_port_num, 1); 686 687 return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE); 688 } 689 690 /* Cached MLX5_CAP_GEN(dev, roce) can be out of sync this early in the 691 * boot process. 692 * In case RoCE cap is writable in FW and user/devlink requested to change the 693 * cap, we are yet to query the final state of the above cap. 694 * Hence, the need for this function. 695 * 696 * Returns 697 * True: 698 * 1) RoCE cap is read only in FW and already disabled 699 * OR: 700 * 2) RoCE cap is writable in FW and user/devlink requested it off. 701 * 702 * In any other case, return False. 703 */ 704 static bool is_roce_fw_disabled(struct mlx5_core_dev *dev) 705 { 706 return (MLX5_CAP_GEN(dev, roce_rw_supported) && !mlx5_is_roce_on(dev)) || 707 (!MLX5_CAP_GEN(dev, roce_rw_supported) && !MLX5_CAP_GEN(dev, roce)); 708 } 709 710 static int handle_hca_cap_roce(struct mlx5_core_dev *dev, void *set_ctx) 711 { 712 void *set_hca_cap; 713 int err; 714 715 if (is_roce_fw_disabled(dev)) 716 return 0; 717 718 err = mlx5_core_get_caps(dev, MLX5_CAP_ROCE); 719 if (err) 720 return err; 721 722 if (MLX5_CAP_ROCE(dev, sw_r_roce_src_udp_port) || 723 !MLX5_CAP_ROCE_MAX(dev, sw_r_roce_src_udp_port)) 724 return 0; 725 726 set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability); 727 memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_ROCE]->cur, 728 MLX5_ST_SZ_BYTES(roce_cap)); 729 MLX5_SET(roce_cap, set_hca_cap, sw_r_roce_src_udp_port, 1); 730 731 if (MLX5_CAP_ROCE_MAX(dev, qp_ooo_transmit_default)) 732 MLX5_SET(roce_cap, set_hca_cap, qp_ooo_transmit_default, 1); 733 734 err = set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_ROCE); 735 return err; 736 } 737 738 static int handle_hca_cap_port_selection(struct mlx5_core_dev *dev, 739 void *set_ctx) 740 { 741 void *set_hca_cap; 742 int err; 743 744 if (!MLX5_CAP_GEN(dev, port_selection_cap)) 745 return 0; 746 747 err = mlx5_core_get_caps(dev, MLX5_CAP_PORT_SELECTION); 748 if (err) 749 return err; 750 751 if (MLX5_CAP_PORT_SELECTION(dev, port_select_flow_table_bypass) || 752 !MLX5_CAP_PORT_SELECTION_MAX(dev, port_select_flow_table_bypass)) 753 return 0; 754 755 set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability); 756 memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_PORT_SELECTION]->cur, 757 MLX5_ST_SZ_BYTES(port_selection_cap)); 758 MLX5_SET(port_selection_cap, set_hca_cap, port_select_flow_table_bypass, 1); 759 760 err = set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_PORT_SELECTION); 761 762 return err; 763 } 764 765 static int set_hca_cap(struct mlx5_core_dev *dev) 766 { 767 int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); 768 void *set_ctx; 769 int err; 770 771 set_ctx = kzalloc(set_sz, GFP_KERNEL); 772 if (!set_ctx) 773 return -ENOMEM; 774 775 err = handle_hca_cap(dev, set_ctx); 776 if (err) { 777 mlx5_core_err(dev, "handle_hca_cap failed\n"); 778 goto out; 779 } 780 781 memset(set_ctx, 0, set_sz); 782 err = handle_hca_cap_atomic(dev, set_ctx); 783 if (err) { 784 mlx5_core_err(dev, "handle_hca_cap_atomic failed\n"); 785 goto out; 786 } 787 788 memset(set_ctx, 0, set_sz); 789 err = handle_hca_cap_odp(dev, set_ctx); 790 if (err) { 791 mlx5_core_err(dev, "handle_hca_cap_odp failed\n"); 792 goto out; 793 } 794 795 memset(set_ctx, 0, set_sz); 796 err = handle_hca_cap_roce(dev, set_ctx); 797 if (err) { 798 mlx5_core_err(dev, "handle_hca_cap_roce failed\n"); 799 goto out; 800 } 801 802 memset(set_ctx, 0, set_sz); 803 err = handle_hca_cap_2(dev, set_ctx); 804 if (err) { 805 mlx5_core_err(dev, "handle_hca_cap_2 failed\n"); 806 goto out; 807 } 808 809 memset(set_ctx, 0, set_sz); 810 err = handle_hca_cap_port_selection(dev, set_ctx); 811 if (err) { 812 mlx5_core_err(dev, "handle_hca_cap_port_selection failed\n"); 813 goto out; 814 } 815 816 out: 817 kfree(set_ctx); 818 return err; 819 } 820 821 static int set_hca_ctrl(struct mlx5_core_dev *dev) 822 { 823 struct mlx5_reg_host_endianness he_in; 824 struct mlx5_reg_host_endianness he_out; 825 int err; 826 827 if (!mlx5_core_is_pf(dev)) 828 return 0; 829 830 memset(&he_in, 0, sizeof(he_in)); 831 he_in.he = MLX5_SET_HOST_ENDIANNESS; 832 err = mlx5_core_access_reg(dev, &he_in, sizeof(he_in), 833 &he_out, sizeof(he_out), 834 MLX5_REG_HOST_ENDIANNESS, 0, 1); 835 return err; 836 } 837 838 static int mlx5_core_set_hca_defaults(struct mlx5_core_dev *dev) 839 { 840 int ret = 0; 841 842 /* Disable local_lb by default */ 843 if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH) 844 ret = mlx5_nic_vport_update_local_lb(dev, false); 845 846 return ret; 847 } 848 849 int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id) 850 { 851 u32 in[MLX5_ST_SZ_DW(enable_hca_in)] = {}; 852 853 MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA); 854 MLX5_SET(enable_hca_in, in, function_id, func_id); 855 MLX5_SET(enable_hca_in, in, embedded_cpu_function, 856 dev->caps.embedded_cpu); 857 return mlx5_cmd_exec_in(dev, enable_hca, in); 858 } 859 860 int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id) 861 { 862 u32 in[MLX5_ST_SZ_DW(disable_hca_in)] = {}; 863 864 MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA); 865 MLX5_SET(disable_hca_in, in, function_id, func_id); 866 MLX5_SET(enable_hca_in, in, embedded_cpu_function, 867 dev->caps.embedded_cpu); 868 return mlx5_cmd_exec_in(dev, disable_hca, in); 869 } 870 871 static int mlx5_core_set_issi(struct mlx5_core_dev *dev) 872 { 873 u32 query_out[MLX5_ST_SZ_DW(query_issi_out)] = {}; 874 u32 query_in[MLX5_ST_SZ_DW(query_issi_in)] = {}; 875 u32 sup_issi; 876 int err; 877 878 MLX5_SET(query_issi_in, query_in, opcode, MLX5_CMD_OP_QUERY_ISSI); 879 err = mlx5_cmd_exec_inout(dev, query_issi, query_in, query_out); 880 if (err) { 881 u32 syndrome = MLX5_GET(query_issi_out, query_out, syndrome); 882 u8 status = MLX5_GET(query_issi_out, query_out, status); 883 884 if (!status || syndrome == MLX5_DRIVER_SYND) { 885 mlx5_core_err(dev, "Failed to query ISSI err(%d) status(%d) synd(%d)\n", 886 err, status, syndrome); 887 return err; 888 } 889 890 mlx5_core_warn(dev, "Query ISSI is not supported by FW, ISSI is 0\n"); 891 dev->issi = 0; 892 return 0; 893 } 894 895 sup_issi = MLX5_GET(query_issi_out, query_out, supported_issi_dw0); 896 897 if (sup_issi & (1 << 1)) { 898 u32 set_in[MLX5_ST_SZ_DW(set_issi_in)] = {}; 899 900 MLX5_SET(set_issi_in, set_in, opcode, MLX5_CMD_OP_SET_ISSI); 901 MLX5_SET(set_issi_in, set_in, current_issi, 1); 902 err = mlx5_cmd_exec_in(dev, set_issi, set_in); 903 if (err) { 904 mlx5_core_err(dev, "Failed to set ISSI to 1 err(%d)\n", 905 err); 906 return err; 907 } 908 909 dev->issi = 1; 910 911 return 0; 912 } else if (sup_issi & (1 << 0) || !sup_issi) { 913 return 0; 914 } 915 916 return -EOPNOTSUPP; 917 } 918 919 static int mlx5_pci_init(struct mlx5_core_dev *dev, struct pci_dev *pdev, 920 const struct pci_device_id *id) 921 { 922 int err = 0; 923 924 mutex_init(&dev->pci_status_mutex); 925 pci_set_drvdata(dev->pdev, dev); 926 927 dev->bar_addr = pci_resource_start(pdev, 0); 928 929 err = mlx5_pci_enable_device(dev); 930 if (err) { 931 mlx5_core_err(dev, "Cannot enable PCI device, aborting\n"); 932 return err; 933 } 934 935 err = request_bar(pdev); 936 if (err) { 937 mlx5_core_err(dev, "error requesting BARs, aborting\n"); 938 goto err_disable; 939 } 940 941 pci_set_master(pdev); 942 943 err = set_dma_caps(pdev); 944 if (err) { 945 mlx5_core_err(dev, "Failed setting DMA capabilities mask, aborting\n"); 946 goto err_clr_master; 947 } 948 949 if (pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP32) && 950 pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP64) && 951 pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP128)) 952 mlx5_core_dbg(dev, "Enabling pci atomics failed\n"); 953 954 dev->iseg = ioremap(dev->bar_addr, sizeof(*dev->iseg)); 955 if (!dev->iseg) { 956 err = -ENOMEM; 957 mlx5_core_err(dev, "Failed mapping initialization segment, aborting\n"); 958 goto err_clr_master; 959 } 960 961 mlx5_pci_vsc_init(dev); 962 963 pci_enable_ptm(pdev); 964 965 return 0; 966 967 err_clr_master: 968 release_bar(dev->pdev); 969 err_disable: 970 mlx5_pci_disable_device(dev); 971 return err; 972 } 973 974 static void mlx5_pci_close(struct mlx5_core_dev *dev) 975 { 976 /* health work might still be active, and it needs pci bar in 977 * order to know the NIC state. Therefore, drain the health WQ 978 * before removing the pci bars 979 */ 980 mlx5_drain_health_wq(dev); 981 pci_disable_ptm(dev->pdev); 982 iounmap(dev->iseg); 983 release_bar(dev->pdev); 984 mlx5_pci_disable_device(dev); 985 } 986 987 static int mlx5_init_once(struct mlx5_core_dev *dev) 988 { 989 int err; 990 991 dev->priv.devc = mlx5_devcom_register_device(dev); 992 if (!dev->priv.devc) 993 mlx5_core_warn(dev, "failed to register devcom device\n"); 994 995 err = mlx5_query_board_id(dev); 996 if (err) { 997 mlx5_core_err(dev, "query board id failed\n"); 998 goto err_devcom; 999 } 1000 1001 err = mlx5_irq_table_init(dev); 1002 if (err) { 1003 mlx5_core_err(dev, "failed to initialize irq table\n"); 1004 goto err_devcom; 1005 } 1006 1007 err = mlx5_eq_table_init(dev); 1008 if (err) { 1009 mlx5_core_err(dev, "failed to initialize eq\n"); 1010 goto err_irq_cleanup; 1011 } 1012 1013 err = mlx5_fw_reset_init(dev); 1014 if (err) { 1015 mlx5_core_err(dev, "failed to initialize fw reset events\n"); 1016 goto err_eq_cleanup; 1017 } 1018 1019 mlx5_cq_debugfs_init(dev); 1020 1021 mlx5_init_reserved_gids(dev); 1022 1023 err = mlx5_init_clock(dev); 1024 if (err) { 1025 mlx5_core_err(dev, "failed to initialize hardware clock\n"); 1026 goto err_tables_cleanup; 1027 } 1028 1029 dev->vxlan = mlx5_vxlan_create(dev); 1030 dev->geneve = mlx5_geneve_create(dev); 1031 1032 err = mlx5_init_rl_table(dev); 1033 if (err) { 1034 mlx5_core_err(dev, "Failed to init rate limiting\n"); 1035 goto err_clock_cleanup; 1036 } 1037 1038 err = mlx5_mpfs_init(dev); 1039 if (err) { 1040 mlx5_core_err(dev, "Failed to init l2 table %d\n", err); 1041 goto err_rl_cleanup; 1042 } 1043 1044 err = mlx5_sriov_init(dev); 1045 if (err) { 1046 mlx5_core_err(dev, "Failed to init sriov %d\n", err); 1047 goto err_mpfs_cleanup; 1048 } 1049 1050 err = mlx5_eswitch_init(dev); 1051 if (err) { 1052 mlx5_core_err(dev, "Failed to init eswitch %d\n", err); 1053 goto err_sriov_cleanup; 1054 } 1055 1056 err = mlx5_fpga_init(dev); 1057 if (err) { 1058 mlx5_core_err(dev, "Failed to init fpga device %d\n", err); 1059 goto err_eswitch_cleanup; 1060 } 1061 1062 err = mlx5_vhca_event_init(dev); 1063 if (err) { 1064 mlx5_core_err(dev, "Failed to init vhca event notifier %d\n", err); 1065 goto err_fpga_cleanup; 1066 } 1067 1068 err = mlx5_sf_hw_table_init(dev); 1069 if (err) { 1070 mlx5_core_err(dev, "Failed to init SF HW table %d\n", err); 1071 goto err_sf_hw_table_cleanup; 1072 } 1073 1074 err = mlx5_sf_table_init(dev); 1075 if (err) { 1076 mlx5_core_err(dev, "Failed to init SF table %d\n", err); 1077 goto err_sf_table_cleanup; 1078 } 1079 1080 err = mlx5_fs_core_alloc(dev); 1081 if (err) { 1082 mlx5_core_err(dev, "Failed to alloc flow steering\n"); 1083 goto err_fs; 1084 } 1085 1086 dev->dm = mlx5_dm_create(dev); 1087 dev->st = mlx5_st_create(dev); 1088 dev->tracer = mlx5_fw_tracer_create(dev); 1089 dev->hv_vhca = mlx5_hv_vhca_create(dev); 1090 dev->rsc_dump = mlx5_rsc_dump_create(dev); 1091 1092 return 0; 1093 1094 err_fs: 1095 mlx5_sf_table_cleanup(dev); 1096 err_sf_table_cleanup: 1097 mlx5_sf_hw_table_cleanup(dev); 1098 err_sf_hw_table_cleanup: 1099 mlx5_vhca_event_cleanup(dev); 1100 err_fpga_cleanup: 1101 mlx5_fpga_cleanup(dev); 1102 err_eswitch_cleanup: 1103 mlx5_eswitch_cleanup(dev->priv.eswitch); 1104 err_sriov_cleanup: 1105 mlx5_sriov_cleanup(dev); 1106 err_mpfs_cleanup: 1107 mlx5_mpfs_cleanup(dev); 1108 err_rl_cleanup: 1109 mlx5_cleanup_rl_table(dev); 1110 err_clock_cleanup: 1111 mlx5_geneve_destroy(dev->geneve); 1112 mlx5_vxlan_destroy(dev->vxlan); 1113 mlx5_cleanup_clock(dev); 1114 err_tables_cleanup: 1115 mlx5_cleanup_reserved_gids(dev); 1116 mlx5_cq_debugfs_cleanup(dev); 1117 mlx5_fw_reset_cleanup(dev); 1118 err_eq_cleanup: 1119 mlx5_eq_table_cleanup(dev); 1120 err_irq_cleanup: 1121 mlx5_irq_table_cleanup(dev); 1122 err_devcom: 1123 mlx5_devcom_unregister_device(dev->priv.devc); 1124 1125 return err; 1126 } 1127 1128 static void mlx5_cleanup_once(struct mlx5_core_dev *dev) 1129 { 1130 mlx5_rsc_dump_destroy(dev); 1131 mlx5_hv_vhca_destroy(dev->hv_vhca); 1132 mlx5_fw_tracer_destroy(dev->tracer); 1133 mlx5_st_destroy(dev); 1134 mlx5_dm_cleanup(dev); 1135 mlx5_fs_core_free(dev); 1136 mlx5_sf_table_cleanup(dev); 1137 mlx5_sf_hw_table_cleanup(dev); 1138 mlx5_vhca_event_cleanup(dev); 1139 mlx5_fpga_cleanup(dev); 1140 mlx5_eswitch_cleanup(dev->priv.eswitch); 1141 mlx5_sriov_cleanup(dev); 1142 mlx5_mpfs_cleanup(dev); 1143 mlx5_cleanup_rl_table(dev); 1144 mlx5_geneve_destroy(dev->geneve); 1145 mlx5_vxlan_destroy(dev->vxlan); 1146 mlx5_cleanup_clock(dev); 1147 mlx5_cleanup_reserved_gids(dev); 1148 mlx5_cq_debugfs_cleanup(dev); 1149 mlx5_fw_reset_cleanup(dev); 1150 mlx5_eq_table_cleanup(dev); 1151 mlx5_irq_table_cleanup(dev); 1152 mlx5_devcom_unregister_device(dev->priv.devc); 1153 } 1154 1155 static int mlx5_function_enable(struct mlx5_core_dev *dev, bool boot, u64 timeout) 1156 { 1157 int err; 1158 1159 mlx5_core_info(dev, "firmware version: %d.%d.%d\n", fw_rev_maj(dev), 1160 fw_rev_min(dev), fw_rev_sub(dev)); 1161 1162 /* Only PFs hold the relevant PCIe information for this query */ 1163 if (mlx5_core_is_pf(dev)) 1164 pcie_print_link_status(dev->pdev); 1165 1166 /* wait for firmware to accept initialization segments configurations 1167 */ 1168 err = wait_fw_init(dev, timeout, 1169 mlx5_tout_ms(dev, FW_PRE_INIT_WARN_MESSAGE_INTERVAL), 1170 "pre-initializing"); 1171 if (err) 1172 return err; 1173 1174 err = mlx5_cmd_enable(dev); 1175 if (err) { 1176 mlx5_core_err(dev, "Failed initializing command interface, aborting\n"); 1177 return err; 1178 } 1179 1180 mlx5_tout_query_iseg(dev); 1181 1182 err = wait_fw_init(dev, mlx5_tout_ms(dev, FW_INIT), 0, "initializing"); 1183 if (err) 1184 goto err_cmd_cleanup; 1185 1186 dev->caps.embedded_cpu = mlx5_read_embedded_cpu(dev); 1187 mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_UP); 1188 1189 err = mlx5_core_enable_hca(dev, 0); 1190 if (err) { 1191 mlx5_core_err(dev, "enable hca failed\n"); 1192 goto err_cmd_cleanup; 1193 } 1194 1195 mlx5_start_health_poll(dev); 1196 1197 err = mlx5_core_set_issi(dev); 1198 if (err) { 1199 mlx5_core_err(dev, "failed to set issi\n"); 1200 goto stop_health_poll; 1201 } 1202 1203 err = mlx5_satisfy_startup_pages(dev, 1); 1204 if (err) { 1205 mlx5_core_err(dev, "failed to allocate boot pages\n"); 1206 goto stop_health_poll; 1207 } 1208 1209 err = mlx5_tout_query_dtor(dev); 1210 if (err) { 1211 mlx5_core_err(dev, "failed to read dtor\n"); 1212 goto reclaim_boot_pages; 1213 } 1214 1215 return 0; 1216 1217 reclaim_boot_pages: 1218 mlx5_reclaim_startup_pages(dev); 1219 stop_health_poll: 1220 mlx5_stop_health_poll(dev, boot); 1221 mlx5_core_disable_hca(dev, 0); 1222 err_cmd_cleanup: 1223 mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN); 1224 mlx5_cmd_disable(dev); 1225 1226 return err; 1227 } 1228 1229 static void mlx5_function_disable(struct mlx5_core_dev *dev, bool boot) 1230 { 1231 mlx5_reclaim_startup_pages(dev); 1232 mlx5_stop_health_poll(dev, boot); 1233 mlx5_core_disable_hca(dev, 0); 1234 mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN); 1235 mlx5_cmd_disable(dev); 1236 } 1237 1238 static int mlx5_function_open(struct mlx5_core_dev *dev) 1239 { 1240 int err; 1241 1242 err = set_hca_ctrl(dev); 1243 if (err) { 1244 mlx5_core_err(dev, "set_hca_ctrl failed\n"); 1245 return err; 1246 } 1247 1248 err = set_hca_cap(dev); 1249 if (err) { 1250 mlx5_core_err(dev, "set_hca_cap failed\n"); 1251 return err; 1252 } 1253 1254 err = mlx5_satisfy_startup_pages(dev, 0); 1255 if (err) { 1256 mlx5_core_err(dev, "failed to allocate init pages\n"); 1257 return err; 1258 } 1259 1260 err = mlx5_cmd_init_hca(dev, sw_owner_id); 1261 if (err) { 1262 mlx5_core_err(dev, "init hca failed\n"); 1263 return err; 1264 } 1265 1266 mlx5_set_driver_version(dev); 1267 1268 err = mlx5_query_hca_caps(dev); 1269 if (err) { 1270 mlx5_core_err(dev, "query hca failed\n"); 1271 return err; 1272 } 1273 mlx5_start_health_fw_log_up(dev); 1274 return 0; 1275 } 1276 1277 static int mlx5_function_close(struct mlx5_core_dev *dev) 1278 { 1279 int err; 1280 1281 err = mlx5_cmd_teardown_hca(dev); 1282 if (err) { 1283 mlx5_core_err(dev, "tear_down_hca failed, skip cleanup\n"); 1284 return err; 1285 } 1286 1287 return 0; 1288 } 1289 1290 static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot, u64 timeout) 1291 { 1292 int err; 1293 1294 err = mlx5_function_enable(dev, boot, timeout); 1295 if (err) 1296 return err; 1297 1298 err = mlx5_function_open(dev); 1299 if (err) 1300 mlx5_function_disable(dev, boot); 1301 return err; 1302 } 1303 1304 static int mlx5_function_teardown(struct mlx5_core_dev *dev, bool boot) 1305 { 1306 int err = mlx5_function_close(dev); 1307 1308 if (!err) 1309 mlx5_function_disable(dev, boot); 1310 else 1311 mlx5_stop_health_poll(dev, boot); 1312 1313 return err; 1314 } 1315 1316 static int mlx5_load(struct mlx5_core_dev *dev) 1317 { 1318 int err; 1319 1320 err = mlx5_alloc_bfreg(dev, &dev->priv.bfreg, false, false); 1321 if (err) { 1322 mlx5_core_err(dev, "Failed allocating bfreg, %d\n", err); 1323 return err; 1324 } 1325 1326 mlx5_events_start(dev); 1327 mlx5_pagealloc_start(dev); 1328 1329 err = mlx5_irq_table_create(dev); 1330 if (err) { 1331 mlx5_core_err(dev, "Failed to alloc IRQs\n"); 1332 goto err_irq_table; 1333 } 1334 1335 err = mlx5_eq_table_create(dev); 1336 if (err) { 1337 mlx5_core_err(dev, "Failed to create EQs\n"); 1338 goto err_eq_table; 1339 } 1340 1341 mlx5_clock_load(dev); 1342 1343 err = mlx5_fw_tracer_init(dev->tracer); 1344 if (err) { 1345 mlx5_core_err(dev, "Failed to init FW tracer %d\n", err); 1346 mlx5_fw_tracer_destroy(dev->tracer); 1347 dev->tracer = NULL; 1348 } 1349 1350 mlx5_fw_reset_events_start(dev); 1351 mlx5_hv_vhca_init(dev->hv_vhca); 1352 1353 err = mlx5_rsc_dump_init(dev); 1354 if (err) { 1355 mlx5_core_err(dev, "Failed to init Resource dump %d\n", err); 1356 mlx5_rsc_dump_destroy(dev); 1357 dev->rsc_dump = NULL; 1358 } 1359 1360 err = mlx5_fpga_device_start(dev); 1361 if (err) { 1362 mlx5_core_err(dev, "fpga device start failed %d\n", err); 1363 goto err_fpga_start; 1364 } 1365 1366 err = mlx5_fs_core_init(dev); 1367 if (err) { 1368 mlx5_core_err(dev, "Failed to init flow steering\n"); 1369 goto err_fs; 1370 } 1371 1372 err = mlx5_core_set_hca_defaults(dev); 1373 if (err) { 1374 mlx5_core_err(dev, "Failed to set hca defaults\n"); 1375 goto err_set_hca; 1376 } 1377 1378 mlx5_vhca_event_start(dev); 1379 1380 err = mlx5_ec_init(dev); 1381 if (err) { 1382 mlx5_core_err(dev, "Failed to init embedded CPU\n"); 1383 goto err_ec; 1384 } 1385 1386 mlx5_lag_add_mdev(dev); 1387 err = mlx5_sriov_attach(dev); 1388 if (err) { 1389 mlx5_core_err(dev, "sriov init failed %d\n", err); 1390 goto err_sriov; 1391 } 1392 1393 mlx5_sf_dev_table_create(dev); 1394 1395 err = mlx5_devlink_traps_register(priv_to_devlink(dev)); 1396 if (err) 1397 goto err_traps_reg; 1398 1399 return 0; 1400 1401 err_traps_reg: 1402 mlx5_sf_dev_table_destroy(dev); 1403 mlx5_sriov_detach(dev); 1404 err_sriov: 1405 mlx5_lag_remove_mdev(dev); 1406 mlx5_ec_cleanup(dev); 1407 err_ec: 1408 mlx5_vhca_event_stop(dev); 1409 err_set_hca: 1410 mlx5_fs_core_cleanup(dev); 1411 err_fs: 1412 mlx5_fpga_device_stop(dev); 1413 err_fpga_start: 1414 mlx5_rsc_dump_cleanup(dev); 1415 mlx5_hv_vhca_cleanup(dev->hv_vhca); 1416 mlx5_fw_reset_events_stop(dev); 1417 mlx5_fw_tracer_cleanup(dev->tracer); 1418 mlx5_clock_unload(dev); 1419 mlx5_eq_table_destroy(dev); 1420 err_eq_table: 1421 mlx5_irq_table_destroy(dev); 1422 err_irq_table: 1423 mlx5_pagealloc_stop(dev); 1424 mlx5_events_stop(dev); 1425 mlx5_free_bfreg(dev, &dev->priv.bfreg); 1426 return err; 1427 } 1428 1429 static void mlx5_unload(struct mlx5_core_dev *dev) 1430 { 1431 mlx5_eswitch_disable(dev->priv.eswitch); 1432 mlx5_devlink_traps_unregister(priv_to_devlink(dev)); 1433 mlx5_vhca_event_stop(dev); 1434 mlx5_sf_dev_table_destroy(dev); 1435 mlx5_sriov_detach(dev); 1436 mlx5_lag_remove_mdev(dev); 1437 mlx5_ec_cleanup(dev); 1438 mlx5_sf_hw_table_destroy(dev); 1439 mlx5_fs_core_cleanup(dev); 1440 mlx5_fpga_device_stop(dev); 1441 mlx5_rsc_dump_cleanup(dev); 1442 mlx5_hv_vhca_cleanup(dev->hv_vhca); 1443 mlx5_fw_reset_events_stop(dev); 1444 mlx5_fw_tracer_cleanup(dev->tracer); 1445 mlx5_clock_unload(dev); 1446 mlx5_eq_table_destroy(dev); 1447 mlx5_irq_table_destroy(dev); 1448 mlx5_pagealloc_stop(dev); 1449 mlx5_events_stop(dev); 1450 mlx5_free_bfreg(dev, &dev->priv.bfreg); 1451 } 1452 1453 int mlx5_init_one_devl_locked(struct mlx5_core_dev *dev) 1454 { 1455 bool light_probe = mlx5_dev_is_lightweight(dev); 1456 int err = 0; 1457 1458 mutex_lock(&dev->intf_state_mutex); 1459 dev->state = MLX5_DEVICE_STATE_UP; 1460 1461 err = mlx5_function_setup(dev, true, mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT)); 1462 if (err) 1463 goto err_function; 1464 1465 err = mlx5_init_once(dev); 1466 if (err) { 1467 mlx5_core_err(dev, "sw objs init failed\n"); 1468 goto function_teardown; 1469 } 1470 1471 /* In case of light_probe, mlx5_devlink is already registered. 1472 * Hence, don't register devlink again. 1473 */ 1474 if (!light_probe) { 1475 err = mlx5_devlink_params_register(priv_to_devlink(dev)); 1476 if (err) 1477 goto err_devlink_params_reg; 1478 } 1479 1480 err = mlx5_load(dev); 1481 if (err) 1482 goto err_load; 1483 1484 set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); 1485 1486 err = mlx5_register_device(dev); 1487 if (err) 1488 goto err_register; 1489 1490 err = mlx5_crdump_enable(dev); 1491 if (err) 1492 mlx5_core_err(dev, "mlx5_crdump_enable failed with error code %d\n", err); 1493 1494 err = mlx5_hwmon_dev_register(dev); 1495 if (err) 1496 mlx5_core_err(dev, "mlx5_hwmon_dev_register failed with error code %d\n", err); 1497 1498 mutex_unlock(&dev->intf_state_mutex); 1499 return 0; 1500 1501 err_register: 1502 clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); 1503 mlx5_unload(dev); 1504 err_load: 1505 if (!light_probe) 1506 mlx5_devlink_params_unregister(priv_to_devlink(dev)); 1507 err_devlink_params_reg: 1508 mlx5_cleanup_once(dev); 1509 function_teardown: 1510 mlx5_function_teardown(dev, true); 1511 err_function: 1512 dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; 1513 mutex_unlock(&dev->intf_state_mutex); 1514 return err; 1515 } 1516 1517 int mlx5_init_one(struct mlx5_core_dev *dev) 1518 { 1519 struct devlink *devlink = priv_to_devlink(dev); 1520 int err; 1521 1522 devl_lock(devlink); 1523 if (dev->shd) { 1524 err = devl_nested_devlink_set(dev->shd, devlink); 1525 if (err) 1526 goto unlock; 1527 } 1528 devl_register(devlink); 1529 err = mlx5_init_one_devl_locked(dev); 1530 if (err) 1531 devl_unregister(devlink); 1532 unlock: 1533 devl_unlock(devlink); 1534 return err; 1535 } 1536 1537 void mlx5_uninit_one(struct mlx5_core_dev *dev) 1538 { 1539 struct devlink *devlink = priv_to_devlink(dev); 1540 1541 devl_lock(devlink); 1542 mutex_lock(&dev->intf_state_mutex); 1543 1544 mlx5_hwmon_dev_unregister(dev); 1545 mlx5_crdump_disable(dev); 1546 mlx5_unregister_device(dev); 1547 1548 if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { 1549 mlx5_core_warn(dev, "%s: interface is down, NOP\n", 1550 __func__); 1551 mlx5_devlink_params_unregister(priv_to_devlink(dev)); 1552 mlx5_cleanup_once(dev); 1553 goto out; 1554 } 1555 1556 clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); 1557 mlx5_unload(dev); 1558 mlx5_devlink_params_unregister(priv_to_devlink(dev)); 1559 mlx5_cleanup_once(dev); 1560 mlx5_function_teardown(dev, true); 1561 out: 1562 mutex_unlock(&dev->intf_state_mutex); 1563 devl_unregister(devlink); 1564 devl_unlock(devlink); 1565 } 1566 1567 int mlx5_load_one_devl_locked(struct mlx5_core_dev *dev, bool recovery) 1568 { 1569 int err = 0; 1570 u64 timeout; 1571 1572 devl_assert_locked(priv_to_devlink(dev)); 1573 mutex_lock(&dev->intf_state_mutex); 1574 if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { 1575 mlx5_core_warn(dev, "interface is up, NOP\n"); 1576 goto out; 1577 } 1578 /* remove any previous indication of internal error */ 1579 dev->state = MLX5_DEVICE_STATE_UP; 1580 1581 if (recovery) 1582 timeout = mlx5_tout_ms(dev, FW_PRE_INIT_ON_RECOVERY_TIMEOUT); 1583 else 1584 timeout = mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT); 1585 err = mlx5_function_setup(dev, false, timeout); 1586 if (err) 1587 goto err_function; 1588 1589 err = mlx5_load(dev); 1590 if (err) 1591 goto err_load; 1592 1593 set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); 1594 1595 err = mlx5_attach_device(dev); 1596 if (err) 1597 goto err_attach; 1598 1599 mutex_unlock(&dev->intf_state_mutex); 1600 return 0; 1601 1602 err_attach: 1603 clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); 1604 mlx5_unload(dev); 1605 err_load: 1606 mlx5_function_teardown(dev, false); 1607 err_function: 1608 dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; 1609 out: 1610 mutex_unlock(&dev->intf_state_mutex); 1611 return err; 1612 } 1613 1614 int mlx5_load_one(struct mlx5_core_dev *dev, bool recovery) 1615 { 1616 struct devlink *devlink = priv_to_devlink(dev); 1617 int ret; 1618 1619 devl_lock(devlink); 1620 ret = mlx5_load_one_devl_locked(dev, recovery); 1621 devl_unlock(devlink); 1622 return ret; 1623 } 1624 1625 void mlx5_unload_one_devl_locked(struct mlx5_core_dev *dev, bool suspend) 1626 { 1627 devl_assert_locked(priv_to_devlink(dev)); 1628 mutex_lock(&dev->intf_state_mutex); 1629 1630 mlx5_detach_device(dev, suspend); 1631 1632 if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { 1633 mlx5_core_warn(dev, "%s: interface is down, NOP\n", 1634 __func__); 1635 goto out; 1636 } 1637 1638 clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); 1639 mlx5_unload(dev); 1640 mlx5_function_teardown(dev, false); 1641 out: 1642 mutex_unlock(&dev->intf_state_mutex); 1643 } 1644 1645 void mlx5_unload_one(struct mlx5_core_dev *dev, bool suspend) 1646 { 1647 struct devlink *devlink = priv_to_devlink(dev); 1648 1649 devl_lock(devlink); 1650 mlx5_unload_one_devl_locked(dev, suspend); 1651 devl_unlock(devlink); 1652 } 1653 1654 /* In case of light probe, we don't need a full query of hca_caps, but only the bellow caps. 1655 * A full query of hca_caps will be done when the device will reload. 1656 */ 1657 static int mlx5_query_hca_caps_light(struct mlx5_core_dev *dev) 1658 { 1659 int err; 1660 1661 err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL); 1662 if (err) 1663 return err; 1664 1665 if (MLX5_CAP_GEN(dev, eth_net_offloads)) { 1666 err = mlx5_core_get_caps_mode(dev, MLX5_CAP_ETHERNET_OFFLOADS, 1667 HCA_CAP_OPMOD_GET_CUR); 1668 if (err) 1669 return err; 1670 } 1671 1672 if (MLX5_CAP_GEN(dev, nic_flow_table) || 1673 MLX5_CAP_GEN(dev, ipoib_enhanced_offloads)) { 1674 err = mlx5_core_get_caps_mode(dev, MLX5_CAP_FLOW_TABLE, 1675 HCA_CAP_OPMOD_GET_CUR); 1676 if (err) 1677 return err; 1678 } 1679 1680 if (MLX5_CAP_GEN_64(dev, general_obj_types) & 1681 MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q) { 1682 err = mlx5_core_get_caps_mode(dev, MLX5_CAP_VDPA_EMULATION, 1683 HCA_CAP_OPMOD_GET_CUR); 1684 if (err) 1685 return err; 1686 } 1687 1688 return 0; 1689 } 1690 1691 int mlx5_init_one_light(struct mlx5_core_dev *dev) 1692 { 1693 struct devlink *devlink = priv_to_devlink(dev); 1694 int err; 1695 1696 devl_lock(devlink); 1697 devl_register(devlink); 1698 dev->state = MLX5_DEVICE_STATE_UP; 1699 err = mlx5_function_enable(dev, true, mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT)); 1700 if (err) { 1701 mlx5_core_warn(dev, "mlx5_function_enable err=%d\n", err); 1702 goto out; 1703 } 1704 1705 err = mlx5_query_hca_caps_light(dev); 1706 if (err) { 1707 mlx5_core_warn(dev, "mlx5_query_hca_caps_light err=%d\n", err); 1708 goto query_hca_caps_err; 1709 } 1710 1711 err = mlx5_devlink_params_register(priv_to_devlink(dev)); 1712 if (err) { 1713 mlx5_core_warn(dev, "mlx5_devlink_param_reg err = %d\n", err); 1714 goto query_hca_caps_err; 1715 } 1716 1717 devl_unlock(devlink); 1718 return 0; 1719 1720 query_hca_caps_err: 1721 mlx5_function_disable(dev, true); 1722 out: 1723 dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; 1724 devl_unregister(devlink); 1725 devl_unlock(devlink); 1726 return err; 1727 } 1728 1729 void mlx5_uninit_one_light(struct mlx5_core_dev *dev) 1730 { 1731 struct devlink *devlink = priv_to_devlink(dev); 1732 1733 devl_lock(devlink); 1734 mlx5_devlink_params_unregister(priv_to_devlink(dev)); 1735 devl_unregister(devlink); 1736 devl_unlock(devlink); 1737 if (dev->state != MLX5_DEVICE_STATE_UP) 1738 return; 1739 mlx5_function_disable(dev, true); 1740 } 1741 1742 /* xxx_light() function are used in order to configure the device without full 1743 * init (light init). e.g.: There isn't a point in reload a device to light state. 1744 * Hence, mlx5_load_one_light() isn't needed. 1745 */ 1746 1747 void mlx5_unload_one_light(struct mlx5_core_dev *dev) 1748 { 1749 if (dev->state != MLX5_DEVICE_STATE_UP) 1750 return; 1751 mlx5_function_disable(dev, false); 1752 dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; 1753 } 1754 1755 static const int types[] = { 1756 MLX5_CAP_GENERAL, 1757 MLX5_CAP_GENERAL_2, 1758 MLX5_CAP_ETHERNET_OFFLOADS, 1759 MLX5_CAP_IPOIB_ENHANCED_OFFLOADS, 1760 MLX5_CAP_ODP, 1761 MLX5_CAP_ATOMIC, 1762 MLX5_CAP_ROCE, 1763 MLX5_CAP_IPOIB_OFFLOADS, 1764 MLX5_CAP_FLOW_TABLE, 1765 MLX5_CAP_ESWITCH_FLOW_TABLE, 1766 MLX5_CAP_ESWITCH, 1767 MLX5_CAP_QOS, 1768 MLX5_CAP_DEBUG, 1769 MLX5_CAP_DEV_MEM, 1770 MLX5_CAP_DEV_EVENT, 1771 MLX5_CAP_TLS, 1772 MLX5_CAP_VDPA_EMULATION, 1773 MLX5_CAP_IPSEC, 1774 MLX5_CAP_PORT_SELECTION, 1775 MLX5_CAP_PSP, 1776 MLX5_CAP_MACSEC, 1777 MLX5_CAP_ADV_VIRTUALIZATION, 1778 MLX5_CAP_CRYPTO, 1779 MLX5_CAP_SHAMPO, 1780 MLX5_CAP_ADV_RDMA, 1781 MLX5_CAP_TLP_EMULATION, 1782 }; 1783 1784 static void mlx5_hca_caps_free(struct mlx5_core_dev *dev) 1785 { 1786 int type; 1787 int i; 1788 1789 for (i = 0; i < ARRAY_SIZE(types); i++) { 1790 type = types[i]; 1791 kfree(dev->caps.hca[type]); 1792 } 1793 } 1794 1795 static int mlx5_hca_caps_alloc(struct mlx5_core_dev *dev) 1796 { 1797 struct mlx5_hca_cap *cap; 1798 int type; 1799 int i; 1800 1801 for (i = 0; i < ARRAY_SIZE(types); i++) { 1802 cap = kzalloc_obj(*cap); 1803 if (!cap) 1804 goto err; 1805 type = types[i]; 1806 dev->caps.hca[type] = cap; 1807 } 1808 1809 return 0; 1810 1811 err: 1812 mlx5_hca_caps_free(dev); 1813 return -ENOMEM; 1814 } 1815 1816 static int mlx5_notifiers_init(struct mlx5_core_dev *dev) 1817 { 1818 int err; 1819 1820 err = mlx5_events_init(dev); 1821 if (err) { 1822 mlx5_core_err(dev, "failed to initialize events\n"); 1823 return err; 1824 } 1825 1826 BLOCKING_INIT_NOTIFIER_HEAD(&dev->priv.esw_n_head); 1827 mlx5_vhca_state_notifier_init(dev); 1828 1829 err = mlx5_sf_hw_notifier_init(dev); 1830 if (err) 1831 goto err_sf_hw_notifier; 1832 1833 err = mlx5_sf_notifiers_init(dev); 1834 if (err) 1835 goto err_sf_notifiers; 1836 1837 err = mlx5_sf_dev_notifier_init(dev); 1838 if (err) 1839 goto err_sf_dev_notifier; 1840 1841 return 0; 1842 1843 err_sf_dev_notifier: 1844 mlx5_sf_notifiers_cleanup(dev); 1845 err_sf_notifiers: 1846 mlx5_sf_hw_notifier_cleanup(dev); 1847 err_sf_hw_notifier: 1848 mlx5_events_cleanup(dev); 1849 return err; 1850 } 1851 1852 static void mlx5_notifiers_cleanup(struct mlx5_core_dev *dev) 1853 { 1854 mlx5_sf_dev_notifier_cleanup(dev); 1855 mlx5_sf_notifiers_cleanup(dev); 1856 mlx5_sf_hw_notifier_cleanup(dev); 1857 mlx5_events_cleanup(dev); 1858 } 1859 1860 int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) 1861 { 1862 struct mlx5_priv *priv = &dev->priv; 1863 int err; 1864 1865 memcpy(&dev->profile, &profile[profile_idx], sizeof(dev->profile)); 1866 lockdep_register_key(&dev->lock_key); 1867 mutex_init(&dev->intf_state_mutex); 1868 lockdep_set_class(&dev->intf_state_mutex, &dev->lock_key); 1869 mutex_init(&dev->mlx5e_res.uplink_netdev_lock); 1870 mutex_init(&dev->wc_state_lock); 1871 1872 mutex_init(&priv->bfregs.reg_head.lock); 1873 mutex_init(&priv->bfregs.wc_head.lock); 1874 INIT_LIST_HEAD(&priv->bfregs.reg_head.list); 1875 INIT_LIST_HEAD(&priv->bfregs.wc_head.list); 1876 1877 mutex_init(&priv->alloc_mutex); 1878 mutex_init(&priv->pgdir_mutex); 1879 INIT_LIST_HEAD(&priv->pgdir_list); 1880 1881 priv->numa_node = dev_to_node(mlx5_core_dma_dev(dev)); 1882 priv->dbg.dbg_root = debugfs_create_dir(dev_name(dev->device), 1883 mlx5_debugfs_root); 1884 1885 INIT_LIST_HEAD(&priv->traps); 1886 1887 err = mlx5_cmd_init(dev); 1888 if (err) { 1889 mlx5_core_err(dev, "Failed initializing cmdif SW structs, aborting\n"); 1890 goto err_cmd_init; 1891 } 1892 1893 err = mlx5_tout_init(dev); 1894 if (err) { 1895 mlx5_core_err(dev, "Failed initializing timeouts, aborting\n"); 1896 goto err_timeout_init; 1897 } 1898 1899 err = mlx5_health_init(dev); 1900 if (err) 1901 goto err_health_init; 1902 1903 err = mlx5_pagealloc_init(dev); 1904 if (err) 1905 goto err_pagealloc_init; 1906 1907 err = mlx5_adev_init(dev); 1908 if (err) 1909 goto err_adev_init; 1910 1911 err = mlx5_hca_caps_alloc(dev); 1912 if (err) 1913 goto err_hca_caps; 1914 1915 err = mlx5_notifiers_init(dev); 1916 if (err) 1917 goto err_hca_caps; 1918 1919 /* The conjunction of sw_vhca_id with sw_owner_id will be a global 1920 * unique id per function which uses mlx5_core. 1921 * Those values are supplied to FW as part of the init HCA command to 1922 * be used by both driver and FW when it's applicable. 1923 */ 1924 dev->priv.sw_vhca_id = ida_alloc_range(&sw_vhca_ida, 1, 1925 MAX_SW_VHCA_ID, 1926 GFP_KERNEL); 1927 if (dev->priv.sw_vhca_id < 0) 1928 mlx5_core_err(dev, "failed to allocate sw_vhca_id, err=%d\n", 1929 dev->priv.sw_vhca_id); 1930 1931 return 0; 1932 1933 err_hca_caps: 1934 mlx5_adev_cleanup(dev); 1935 err_adev_init: 1936 mlx5_pagealloc_cleanup(dev); 1937 err_pagealloc_init: 1938 mlx5_health_cleanup(dev); 1939 err_health_init: 1940 mlx5_tout_cleanup(dev); 1941 err_timeout_init: 1942 mlx5_cmd_cleanup(dev); 1943 err_cmd_init: 1944 debugfs_remove(dev->priv.dbg.dbg_root); 1945 mutex_destroy(&priv->pgdir_mutex); 1946 mutex_destroy(&priv->alloc_mutex); 1947 mutex_destroy(&priv->bfregs.wc_head.lock); 1948 mutex_destroy(&priv->bfregs.reg_head.lock); 1949 mutex_destroy(&dev->intf_state_mutex); 1950 lockdep_unregister_key(&dev->lock_key); 1951 return err; 1952 } 1953 1954 void mlx5_mdev_uninit(struct mlx5_core_dev *dev) 1955 { 1956 struct mlx5_priv *priv = &dev->priv; 1957 1958 if (priv->sw_vhca_id > 0) 1959 ida_free(&sw_vhca_ida, dev->priv.sw_vhca_id); 1960 1961 mlx5_notifiers_cleanup(dev); 1962 mlx5_hca_caps_free(dev); 1963 mlx5_adev_cleanup(dev); 1964 mlx5_pagealloc_cleanup(dev); 1965 mlx5_health_cleanup(dev); 1966 mlx5_tout_cleanup(dev); 1967 mlx5_cmd_cleanup(dev); 1968 debugfs_remove_recursive(dev->priv.dbg.dbg_root); 1969 mutex_destroy(&priv->pgdir_mutex); 1970 mutex_destroy(&priv->alloc_mutex); 1971 mutex_destroy(&priv->bfregs.wc_head.lock); 1972 mutex_destroy(&priv->bfregs.reg_head.lock); 1973 mutex_destroy(&dev->wc_state_lock); 1974 mutex_destroy(&dev->mlx5e_res.uplink_netdev_lock); 1975 mutex_destroy(&dev->intf_state_mutex); 1976 lockdep_unregister_key(&dev->lock_key); 1977 } 1978 1979 static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id) 1980 { 1981 struct mlx5_core_dev *dev; 1982 struct devlink *devlink; 1983 int err; 1984 1985 devlink = mlx5_devlink_alloc(&pdev->dev); 1986 if (!devlink) { 1987 dev_err(&pdev->dev, "devlink alloc failed\n"); 1988 return -ENOMEM; 1989 } 1990 1991 dev = devlink_priv(devlink); 1992 dev->device = &pdev->dev; 1993 dev->pdev = pdev; 1994 1995 dev->coredev_type = id->driver_data & MLX5_PCI_DEV_IS_VF ? 1996 MLX5_COREDEV_VF : MLX5_COREDEV_PF; 1997 1998 dev->priv.adev_idx = mlx5_adev_idx_alloc(); 1999 if (dev->priv.adev_idx < 0) { 2000 err = dev->priv.adev_idx; 2001 goto adev_init_err; 2002 } 2003 2004 err = mlx5_mdev_init(dev, prof_sel); 2005 if (err) 2006 goto mdev_init_err; 2007 2008 err = mlx5_pci_init(dev, pdev, id); 2009 if (err) { 2010 mlx5_core_err(dev, "mlx5_pci_init failed with error code %d\n", 2011 err); 2012 goto pci_init_err; 2013 } 2014 2015 err = mlx5_shd_init(dev); 2016 if (err) { 2017 mlx5_core_err(dev, "mlx5_shd_init failed with error code %d\n", 2018 err); 2019 goto shd_init_err; 2020 } 2021 2022 err = mlx5_init_one(dev); 2023 if (err) { 2024 mlx5_core_err(dev, "mlx5_init_one failed with error code %d\n", 2025 err); 2026 goto err_init_one; 2027 } 2028 2029 mlx5_vhca_debugfs_init(dev); 2030 2031 pci_save_state(pdev); 2032 return 0; 2033 2034 err_init_one: 2035 mlx5_shd_uninit(dev); 2036 shd_init_err: 2037 mlx5_pci_close(dev); 2038 pci_init_err: 2039 mlx5_mdev_uninit(dev); 2040 mdev_init_err: 2041 mlx5_adev_idx_free(dev->priv.adev_idx); 2042 adev_init_err: 2043 mlx5_devlink_free(devlink); 2044 2045 return err; 2046 } 2047 2048 static void remove_one(struct pci_dev *pdev) 2049 { 2050 struct mlx5_core_dev *dev = pci_get_drvdata(pdev); 2051 struct devlink *devlink = priv_to_devlink(dev); 2052 2053 set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state); 2054 mlx5_drain_fw_reset(dev); 2055 mlx5_drain_health_wq(dev); 2056 mlx5_sriov_disable(pdev, false); 2057 mlx5_uninit_one(dev); 2058 mlx5_shd_uninit(dev); 2059 mlx5_pci_close(dev); 2060 mlx5_mdev_uninit(dev); 2061 mlx5_adev_idx_free(dev->priv.adev_idx); 2062 mlx5_devlink_free(devlink); 2063 } 2064 2065 #define mlx5_pci_trace(dev, fmt, ...) ({ \ 2066 struct mlx5_core_dev *__dev = (dev); \ 2067 mlx5_core_info(__dev, "%s Device state = %d health sensors: %d pci_status: %d. " fmt, \ 2068 __func__, __dev->state, mlx5_health_check_fatal_sensors(__dev), \ 2069 __dev->pci_status, ##__VA_ARGS__); \ 2070 }) 2071 2072 static const char *result2str(enum pci_ers_result result) 2073 { 2074 return result == PCI_ERS_RESULT_NEED_RESET ? "need reset" : 2075 result == PCI_ERS_RESULT_DISCONNECT ? "disconnect" : 2076 result == PCI_ERS_RESULT_RECOVERED ? "recovered" : 2077 "unknown"; 2078 } 2079 2080 static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, 2081 pci_channel_state_t state) 2082 { 2083 struct mlx5_core_dev *dev = pci_get_drvdata(pdev); 2084 enum pci_ers_result res; 2085 2086 mlx5_pci_trace(dev, "Enter, pci channel state = %d\n", state); 2087 2088 mlx5_enter_error_state(dev, false); 2089 mlx5_error_sw_reset(dev); 2090 mlx5_unload_one(dev, false); 2091 mlx5_drain_health_wq(dev); 2092 mlx5_pci_disable_device(dev); 2093 2094 res = state == pci_channel_io_perm_failure ? 2095 PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET; 2096 2097 mlx5_core_info(dev, "%s Device state = %d pci_status: %d. Exit, result = %d, %s\n", 2098 __func__, dev->state, dev->pci_status, res, result2str(res)); 2099 return res; 2100 } 2101 2102 /* wait for the device to show vital signs by waiting 2103 * for the health counter to start counting. 2104 */ 2105 static int wait_vital(struct pci_dev *pdev) 2106 { 2107 struct mlx5_core_dev *dev = pci_get_drvdata(pdev); 2108 struct mlx5_core_health *health = &dev->priv.health; 2109 const int niter = 100; 2110 u32 last_count = 0; 2111 u32 count; 2112 int i; 2113 2114 for (i = 0; i < niter; i++) { 2115 count = ioread32be(health->health_counter); 2116 if (count && count != 0xffffffff) { 2117 if (last_count && last_count != count) { 2118 mlx5_core_info(dev, 2119 "wait vital counter value 0x%x after %d iterations\n", 2120 count, i); 2121 return 0; 2122 } 2123 last_count = count; 2124 } 2125 msleep(50); 2126 } 2127 2128 return -ETIMEDOUT; 2129 } 2130 2131 static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev) 2132 { 2133 enum pci_ers_result res = PCI_ERS_RESULT_DISCONNECT; 2134 struct mlx5_core_dev *dev = pci_get_drvdata(pdev); 2135 int err; 2136 2137 mlx5_core_info(dev, "%s Device state = %d pci_status: %d. Enter\n", 2138 __func__, dev->state, dev->pci_status); 2139 2140 err = mlx5_pci_enable_device(dev); 2141 if (err) { 2142 mlx5_core_err(dev, "%s: mlx5_pci_enable_device failed with error code: %d\n", 2143 __func__, err); 2144 goto out; 2145 } 2146 2147 pci_set_master(pdev); 2148 pci_restore_state(pdev); 2149 2150 err = wait_vital(pdev); 2151 if (err) { 2152 mlx5_core_err(dev, "%s: wait vital failed with error code: %d\n", 2153 __func__, err); 2154 goto out; 2155 } 2156 2157 res = PCI_ERS_RESULT_RECOVERED; 2158 out: 2159 mlx5_core_info(dev, "%s Device state = %d pci_status: %d. Exit, err = %d, result = %d, %s\n", 2160 __func__, dev->state, dev->pci_status, err, res, result2str(res)); 2161 return res; 2162 } 2163 2164 static void mlx5_pci_resume(struct pci_dev *pdev) 2165 { 2166 struct mlx5_core_dev *dev = pci_get_drvdata(pdev); 2167 int err; 2168 2169 mlx5_pci_trace(dev, "Enter, loading driver..\n"); 2170 2171 err = mlx5_load_one(dev, false); 2172 2173 if (!err) 2174 devlink_health_reporter_state_update(dev->priv.health.fw_fatal_reporter, 2175 DEVLINK_HEALTH_REPORTER_STATE_HEALTHY); 2176 2177 mlx5_pci_trace(dev, "Done, err = %d, device %s\n", err, 2178 !err ? "recovered" : "Failed"); 2179 } 2180 2181 static const struct pci_error_handlers mlx5_err_handler = { 2182 .error_detected = mlx5_pci_err_detected, 2183 .slot_reset = mlx5_pci_slot_reset, 2184 .resume = mlx5_pci_resume 2185 }; 2186 2187 static int mlx5_try_fast_unload(struct mlx5_core_dev *dev) 2188 { 2189 bool fast_teardown = false, force_teardown = false; 2190 int ret = 1; 2191 2192 fast_teardown = MLX5_CAP_GEN(dev, fast_teardown); 2193 force_teardown = MLX5_CAP_GEN(dev, force_teardown); 2194 2195 mlx5_core_dbg(dev, "force teardown firmware support=%d\n", force_teardown); 2196 mlx5_core_dbg(dev, "fast teardown firmware support=%d\n", fast_teardown); 2197 2198 if (!fast_teardown && !force_teardown) 2199 return -EOPNOTSUPP; 2200 2201 if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { 2202 mlx5_core_dbg(dev, "Device in internal error state, giving up\n"); 2203 return -EAGAIN; 2204 } 2205 2206 /* Panic tear down fw command will stop the PCI bus communication 2207 * with the HCA, so the health poll is no longer needed. 2208 */ 2209 mlx5_stop_health_poll(dev, false); 2210 2211 ret = mlx5_cmd_fast_teardown_hca(dev); 2212 if (!ret) 2213 goto succeed; 2214 2215 ret = mlx5_cmd_force_teardown_hca(dev); 2216 if (!ret) 2217 goto succeed; 2218 2219 mlx5_core_dbg(dev, "Firmware couldn't do fast unload error: %d\n", ret); 2220 mlx5_start_health_poll(dev); 2221 return ret; 2222 2223 succeed: 2224 mlx5_enter_error_state(dev, true); 2225 2226 /* Some platforms requiring freeing the IRQ's in the shutdown 2227 * flow. If they aren't freed they can't be allocated after 2228 * kexec. There is no need to cleanup the mlx5_core software 2229 * contexts. 2230 */ 2231 mlx5_core_eq_free_irqs(dev); 2232 2233 return 0; 2234 } 2235 2236 static void shutdown(struct pci_dev *pdev) 2237 { 2238 struct mlx5_core_dev *dev = pci_get_drvdata(pdev); 2239 int err; 2240 2241 mlx5_core_info(dev, "Shutdown was called\n"); 2242 set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state); 2243 mlx5_drain_fw_reset(dev); 2244 mlx5_drain_health_wq(dev); 2245 err = mlx5_try_fast_unload(dev); 2246 if (err) 2247 mlx5_unload_one(dev, false); 2248 mlx5_pci_disable_device(dev); 2249 } 2250 2251 static int mlx5_suspend(struct pci_dev *pdev, pm_message_t state) 2252 { 2253 struct mlx5_core_dev *dev = pci_get_drvdata(pdev); 2254 2255 mlx5_unload_one(dev, true); 2256 2257 return 0; 2258 } 2259 2260 static int mlx5_resume(struct pci_dev *pdev) 2261 { 2262 struct mlx5_core_dev *dev = pci_get_drvdata(pdev); 2263 2264 return mlx5_load_one(dev, false); 2265 } 2266 2267 static const struct pci_device_id mlx5_core_pci_table[] = { 2268 { PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_CONNECTIB) }, 2269 { PCI_VDEVICE(MELLANOX, 0x1012), MLX5_PCI_DEV_IS_VF}, /* Connect-IB VF */ 2270 { PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_CONNECTX4) }, 2271 { PCI_VDEVICE(MELLANOX, 0x1014), MLX5_PCI_DEV_IS_VF}, /* ConnectX-4 VF */ 2272 { PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_CONNECTX4_LX) }, 2273 { PCI_VDEVICE(MELLANOX, 0x1016), MLX5_PCI_DEV_IS_VF}, /* ConnectX-4LX VF */ 2274 { PCI_VDEVICE(MELLANOX, 0x1017) }, /* ConnectX-5, PCIe 3.0 */ 2275 { PCI_VDEVICE(MELLANOX, 0x1018), MLX5_PCI_DEV_IS_VF}, /* ConnectX-5 VF */ 2276 { PCI_VDEVICE(MELLANOX, 0x1019) }, /* ConnectX-5 Ex */ 2277 { PCI_VDEVICE(MELLANOX, 0x101a), MLX5_PCI_DEV_IS_VF}, /* ConnectX-5 Ex VF */ 2278 { PCI_VDEVICE(MELLANOX, 0x101b) }, /* ConnectX-6 */ 2279 { PCI_VDEVICE(MELLANOX, 0x101c), MLX5_PCI_DEV_IS_VF}, /* ConnectX-6 VF */ 2280 { PCI_VDEVICE(MELLANOX, 0x101d) }, /* ConnectX-6 Dx */ 2281 { PCI_VDEVICE(MELLANOX, 0x101e), MLX5_PCI_DEV_IS_VF}, /* ConnectX Family mlx5Gen Virtual Function */ 2282 { PCI_VDEVICE(MELLANOX, 0x101f) }, /* ConnectX-6 LX */ 2283 { PCI_VDEVICE(MELLANOX, 0x1021) }, /* ConnectX-7 */ 2284 { PCI_VDEVICE(MELLANOX, 0x1023) }, /* ConnectX-8 */ 2285 { PCI_VDEVICE(MELLANOX, 0x1025) }, /* ConnectX-9 */ 2286 { PCI_VDEVICE(MELLANOX, 0x1027) }, /* ConnectX-10 */ 2287 { PCI_VDEVICE(MELLANOX, 0x2101) }, /* ConnectX-10 NVLink-C2C */ 2288 { PCI_VDEVICE(MELLANOX, 0xa2d2) }, /* BlueField integrated ConnectX-5 network controller */ 2289 { PCI_VDEVICE(MELLANOX, 0xa2d3), MLX5_PCI_DEV_IS_VF}, /* BlueField integrated ConnectX-5 network controller VF */ 2290 { PCI_VDEVICE(MELLANOX, 0xa2d6) }, /* BlueField-2 integrated ConnectX-6 Dx network controller */ 2291 { PCI_VDEVICE(MELLANOX, 0xa2dc) }, /* BlueField-3 integrated ConnectX-7 network controller */ 2292 { PCI_VDEVICE(MELLANOX, 0xa2df) }, /* BlueField-4 integrated ConnectX-8 network controller */ 2293 { 0, } 2294 }; 2295 2296 MODULE_DEVICE_TABLE(pci, mlx5_core_pci_table); 2297 2298 void mlx5_disable_device(struct mlx5_core_dev *dev) 2299 { 2300 mlx5_error_sw_reset(dev); 2301 mlx5_unload_one_devl_locked(dev, false); 2302 } 2303 2304 int mlx5_recover_device(struct mlx5_core_dev *dev) 2305 { 2306 if (!mlx5_core_is_sf(dev)) { 2307 mlx5_pci_disable_device(dev); 2308 if (mlx5_pci_slot_reset(dev->pdev) != PCI_ERS_RESULT_RECOVERED) 2309 return -EIO; 2310 } 2311 2312 return mlx5_load_one_devl_locked(dev, true); 2313 } 2314 2315 static struct pci_driver mlx5_core_driver = { 2316 .name = KBUILD_MODNAME, 2317 .id_table = mlx5_core_pci_table, 2318 .probe = probe_one, 2319 .remove = remove_one, 2320 .suspend = mlx5_suspend, 2321 .resume = mlx5_resume, 2322 .shutdown = shutdown, 2323 .err_handler = &mlx5_err_handler, 2324 .sriov_configure = mlx5_core_sriov_configure, 2325 .sriov_get_vf_total_msix = mlx5_sriov_get_vf_total_msix, 2326 .sriov_set_msix_vec_count = mlx5_core_sriov_set_msix_vec_count, 2327 }; 2328 2329 /** 2330 * mlx5_vf_get_core_dev - Get the mlx5 core device from a given VF PCI device if 2331 * mlx5_core is its driver. 2332 * @pdev: The associated PCI device. 2333 * 2334 * Upon return the interface state lock stay held to let caller uses it safely. 2335 * Caller must ensure to use the returned mlx5 device for a narrow window 2336 * and put it back with mlx5_vf_put_core_dev() immediately once usage was over. 2337 * 2338 * Return: Pointer to the associated mlx5_core_dev or NULL. 2339 */ 2340 struct mlx5_core_dev *mlx5_vf_get_core_dev(struct pci_dev *pdev) 2341 { 2342 struct mlx5_core_dev *mdev; 2343 2344 mdev = pci_iov_get_pf_drvdata(pdev, &mlx5_core_driver); 2345 if (IS_ERR(mdev)) 2346 return NULL; 2347 2348 mutex_lock(&mdev->intf_state_mutex); 2349 if (!test_bit(MLX5_INTERFACE_STATE_UP, &mdev->intf_state)) { 2350 mutex_unlock(&mdev->intf_state_mutex); 2351 return NULL; 2352 } 2353 2354 return mdev; 2355 } 2356 EXPORT_SYMBOL(mlx5_vf_get_core_dev); 2357 2358 /** 2359 * mlx5_vf_put_core_dev - Put the mlx5 core device back. 2360 * @mdev: The mlx5 core device. 2361 * 2362 * Upon return the interface state lock is unlocked and caller should not 2363 * access the mdev any more. 2364 */ 2365 void mlx5_vf_put_core_dev(struct mlx5_core_dev *mdev) 2366 { 2367 mutex_unlock(&mdev->intf_state_mutex); 2368 } 2369 EXPORT_SYMBOL(mlx5_vf_put_core_dev); 2370 2371 static void mlx5_core_verify_params(void) 2372 { 2373 if (prof_sel >= ARRAY_SIZE(profile)) { 2374 pr_warn("mlx5_core: WARNING: Invalid module parameter prof_sel %d, valid range 0-%zu, changing back to default(%d)\n", 2375 prof_sel, 2376 ARRAY_SIZE(profile) - 1, 2377 MLX5_DEFAULT_PROF); 2378 prof_sel = MLX5_DEFAULT_PROF; 2379 } 2380 } 2381 2382 static int __init mlx5_init(void) 2383 { 2384 int err; 2385 2386 WARN_ONCE(strcmp(MLX5_ADEV_NAME, KBUILD_MODNAME), 2387 "mlx5_core name not in sync with kernel module name"); 2388 2389 get_random_bytes(&sw_owner_id, sizeof(sw_owner_id)); 2390 2391 mlx5_core_verify_params(); 2392 mlx5_register_debugfs(); 2393 2394 err = mlx5e_init(); 2395 if (err) 2396 goto err_debug; 2397 2398 err = mlx5_sf_driver_register(); 2399 if (err) 2400 goto err_sf; 2401 2402 err = pci_register_driver(&mlx5_core_driver); 2403 if (err) 2404 goto err_pci; 2405 2406 return 0; 2407 2408 err_pci: 2409 mlx5_sf_driver_unregister(); 2410 err_sf: 2411 mlx5e_cleanup(); 2412 err_debug: 2413 mlx5_unregister_debugfs(); 2414 return err; 2415 } 2416 2417 static void __exit mlx5_cleanup(void) 2418 { 2419 pci_unregister_driver(&mlx5_core_driver); 2420 mlx5_sf_driver_unregister(); 2421 mlx5e_cleanup(); 2422 mlx5_unregister_debugfs(); 2423 } 2424 2425 module_init(mlx5_init); 2426 module_exit(mlx5_cleanup); 2427