1 /* 2 * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <linux/highmem.h> 34 #include <linux/module.h> 35 #include <linux/init.h> 36 #include <linux/errno.h> 37 #include <linux/pci.h> 38 #include <linux/dma-mapping.h> 39 #include <linux/slab.h> 40 #include <linux/interrupt.h> 41 #include <linux/delay.h> 42 #include <linux/mlx5/driver.h> 43 #include <linux/mlx5/cq.h> 44 #include <linux/mlx5/qp.h> 45 #include <linux/debugfs.h> 46 #include <linux/kmod.h> 47 #include <linux/mlx5/mlx5_ifc.h> 48 #include <linux/mlx5/vport.h> 49 #include <linux/version.h> 50 #include <net/devlink.h> 51 #include "mlx5_core.h" 52 #include "lib/eq.h" 53 #include "fs_core.h" 54 #include "lib/mpfs.h" 55 #include "eswitch.h" 56 #include "devlink.h" 57 #include "fw_reset.h" 58 #include "lib/mlx5.h" 59 #include "lib/tout.h" 60 #include "fpga/core.h" 61 #include "en_accel/ipsec.h" 62 #include "lib/clock.h" 63 #include "lib/vxlan.h" 64 #include "lib/geneve.h" 65 #include "lib/devcom.h" 66 #include "lib/pci_vsc.h" 67 #include "diag/fw_tracer.h" 68 #include "ecpf.h" 69 #include "lib/hv_vhca.h" 70 #include "diag/rsc_dump.h" 71 #include "sf/vhca_event.h" 72 #include "sf/dev/dev.h" 73 #include "sf/sf.h" 74 #include "mlx5_irq.h" 75 #include "hwmon.h" 76 #include "lag/lag.h" 77 #include "sh_devlink.h" 78 79 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>"); 80 MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) core driver"); 81 MODULE_LICENSE("Dual BSD/GPL"); 82 83 unsigned int mlx5_core_debug_mask; 84 module_param_named(debug_mask, mlx5_core_debug_mask, uint, 0644); 85 MODULE_PARM_DESC(debug_mask, "debug mask: 1 = dump cmd data, 2 = dump cmd exec time, 3 = both. Default=0"); 86 87 static unsigned int prof_sel = MLX5_DEFAULT_PROF; 88 module_param_named(prof_sel, prof_sel, uint, 0444); 89 MODULE_PARM_DESC(prof_sel, "profile selector. Valid range 0 - 2"); 90 91 static u32 sw_owner_id[4]; 92 #define MAX_SW_VHCA_ID (BIT(__mlx5_bit_sz(cmd_hca_cap_2, sw_vhca_id)) - 1) 93 static DEFINE_IDA(sw_vhca_ida); 94 95 enum { 96 MLX5_ATOMIC_REQ_MODE_BE = 0x0, 97 MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS = 0x1, 98 }; 99 100 #define LOG_MAX_SUPPORTED_QPS 0xff 101 102 static struct mlx5_profile profile[] = { 103 [0] = { 104 .mask = 0, 105 .num_cmd_caches = MLX5_NUM_COMMAND_CACHES, 106 }, 107 [1] = { 108 .mask = MLX5_PROF_MASK_QP_SIZE, 109 .log_max_qp = 12, 110 .num_cmd_caches = MLX5_NUM_COMMAND_CACHES, 111 112 }, 113 [2] = { 114 .mask = MLX5_PROF_MASK_QP_SIZE, 115 .log_max_qp = LOG_MAX_SUPPORTED_QPS, 116 .num_cmd_caches = MLX5_NUM_COMMAND_CACHES, 117 }, 118 [3] = { 119 .mask = MLX5_PROF_MASK_QP_SIZE, 120 .log_max_qp = LOG_MAX_SUPPORTED_QPS, 121 .num_cmd_caches = 0, 122 }, 123 }; 124 125 static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili, 126 u32 warn_time_mili, const char *init_state) 127 { 128 unsigned long warn = jiffies + msecs_to_jiffies(warn_time_mili); 129 unsigned long end = jiffies + msecs_to_jiffies(max_wait_mili); 130 u32 fw_initializing; 131 132 do { 133 fw_initializing = ioread32be(&dev->iseg->initializing); 134 if (!(fw_initializing >> 31)) 135 break; 136 if (time_after(jiffies, end)) { 137 mlx5_core_err(dev, "Firmware over %u MS in %s state, aborting\n", 138 max_wait_mili, init_state); 139 return -ETIMEDOUT; 140 } 141 if (test_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state)) { 142 mlx5_core_warn(dev, "device is being removed, stop waiting for FW %s\n", 143 init_state); 144 return -ENODEV; 145 } 146 if (warn_time_mili && time_after(jiffies, warn)) { 147 mlx5_core_warn(dev, "Waiting for FW %s, timeout abort in %ds (0x%x)\n", 148 init_state, jiffies_to_msecs(end - warn) / 1000, 149 fw_initializing); 150 warn = jiffies + msecs_to_jiffies(warn_time_mili); 151 } 152 msleep(mlx5_tout_ms(dev, FW_PRE_INIT_WAIT)); 153 } while (true); 154 155 return 0; 156 } 157 158 static void mlx5_set_driver_version(struct mlx5_core_dev *dev) 159 { 160 int driver_ver_sz = MLX5_FLD_SZ_BYTES(set_driver_version_in, 161 driver_version); 162 u8 in[MLX5_ST_SZ_BYTES(set_driver_version_in)] = {}; 163 char *string; 164 165 if (!MLX5_CAP_GEN(dev, driver_version)) 166 return; 167 168 string = MLX5_ADDR_OF(set_driver_version_in, in, driver_version); 169 170 snprintf(string, driver_ver_sz, "Linux,%s,%u.%u.%u", 171 KBUILD_MODNAME, LINUX_VERSION_MAJOR, 172 LINUX_VERSION_PATCHLEVEL, LINUX_VERSION_SUBLEVEL); 173 174 /*Send the command*/ 175 MLX5_SET(set_driver_version_in, in, opcode, 176 MLX5_CMD_OP_SET_DRIVER_VERSION); 177 178 mlx5_cmd_exec_in(dev, set_driver_version, in); 179 } 180 181 static int set_dma_caps(struct pci_dev *pdev) 182 { 183 int err; 184 185 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); 186 if (err) { 187 dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask\n"); 188 err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); 189 if (err) { 190 dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting\n"); 191 return err; 192 } 193 } 194 195 dma_set_max_seg_size(&pdev->dev, 2u * 1024 * 1024 * 1024); 196 return err; 197 } 198 199 static int mlx5_pci_enable_device(struct mlx5_core_dev *dev) 200 { 201 struct pci_dev *pdev = dev->pdev; 202 int err = 0; 203 204 mutex_lock(&dev->pci_status_mutex); 205 if (dev->pci_status == MLX5_PCI_STATUS_DISABLED) { 206 err = pci_enable_device(pdev); 207 if (!err) 208 dev->pci_status = MLX5_PCI_STATUS_ENABLED; 209 } 210 mutex_unlock(&dev->pci_status_mutex); 211 212 return err; 213 } 214 215 static void mlx5_pci_disable_device(struct mlx5_core_dev *dev) 216 { 217 struct pci_dev *pdev = dev->pdev; 218 219 mutex_lock(&dev->pci_status_mutex); 220 if (dev->pci_status == MLX5_PCI_STATUS_ENABLED) { 221 pci_disable_device(pdev); 222 dev->pci_status = MLX5_PCI_STATUS_DISABLED; 223 } 224 mutex_unlock(&dev->pci_status_mutex); 225 } 226 227 static int request_bar(struct pci_dev *pdev) 228 { 229 int err = 0; 230 231 if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) { 232 dev_err(&pdev->dev, "Missing registers BAR, aborting\n"); 233 return -ENODEV; 234 } 235 236 err = pci_request_regions(pdev, KBUILD_MODNAME); 237 if (err) 238 dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n"); 239 240 return err; 241 } 242 243 static void release_bar(struct pci_dev *pdev) 244 { 245 pci_release_regions(pdev); 246 } 247 248 struct mlx5_reg_host_endianness { 249 u8 he; 250 u8 rsvd[15]; 251 }; 252 253 static u16 to_fw_pkey_sz(struct mlx5_core_dev *dev, u32 size) 254 { 255 switch (size) { 256 case 128: 257 return 0; 258 case 256: 259 return 1; 260 case 512: 261 return 2; 262 case 1024: 263 return 3; 264 case 2048: 265 return 4; 266 case 4096: 267 return 5; 268 default: 269 mlx5_core_warn(dev, "invalid pkey table size %d\n", size); 270 return 0; 271 } 272 } 273 274 void mlx5_core_uplink_netdev_set(struct mlx5_core_dev *dev, struct net_device *netdev) 275 { 276 mutex_lock(&dev->mlx5e_res.uplink_netdev_lock); 277 dev->mlx5e_res.uplink_netdev = netdev; 278 mlx5_blocking_notifier_call_chain(dev, MLX5_DRIVER_EVENT_UPLINK_NETDEV, 279 netdev); 280 mutex_unlock(&dev->mlx5e_res.uplink_netdev_lock); 281 } 282 283 void mlx5_core_uplink_netdev_event_replay(struct mlx5_core_dev *dev) 284 { 285 mutex_lock(&dev->mlx5e_res.uplink_netdev_lock); 286 mlx5_blocking_notifier_call_chain(dev, MLX5_DRIVER_EVENT_UPLINK_NETDEV, 287 dev->mlx5e_res.uplink_netdev); 288 mutex_unlock(&dev->mlx5e_res.uplink_netdev_lock); 289 } 290 EXPORT_SYMBOL(mlx5_core_uplink_netdev_event_replay); 291 292 void mlx5_core_mp_event_replay(struct mlx5_core_dev *dev, u32 event, void *data) 293 { 294 mlx5_blocking_notifier_call_chain(dev, event, data); 295 } 296 EXPORT_SYMBOL(mlx5_core_mp_event_replay); 297 298 int mlx5_core_get_caps_mode(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type, 299 enum mlx5_cap_mode cap_mode) 300 { 301 u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)]; 302 int out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); 303 void *out, *hca_caps; 304 u16 opmod = (cap_type << 1) | (cap_mode & 0x01); 305 int err; 306 307 if (WARN_ON(!dev->caps.hca[cap_type])) 308 /* this cap_type must be added to mlx5_hca_caps_alloc() */ 309 return -EINVAL; 310 311 memset(in, 0, sizeof(in)); 312 out = kzalloc(out_sz, GFP_KERNEL); 313 if (!out) 314 return -ENOMEM; 315 316 MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); 317 MLX5_SET(query_hca_cap_in, in, op_mod, opmod); 318 err = mlx5_cmd_exec_inout(dev, query_hca_cap, in, out); 319 if (err) { 320 mlx5_core_warn(dev, 321 "QUERY_HCA_CAP : type(%x) opmode(%x) Failed(%d)\n", 322 cap_type, cap_mode, err); 323 goto query_ex; 324 } 325 326 hca_caps = MLX5_ADDR_OF(query_hca_cap_out, out, capability); 327 328 switch (cap_mode) { 329 case HCA_CAP_OPMOD_GET_MAX: 330 memcpy(dev->caps.hca[cap_type]->max, hca_caps, 331 MLX5_UN_SZ_BYTES(hca_cap_union)); 332 break; 333 case HCA_CAP_OPMOD_GET_CUR: 334 memcpy(dev->caps.hca[cap_type]->cur, hca_caps, 335 MLX5_UN_SZ_BYTES(hca_cap_union)); 336 break; 337 default: 338 mlx5_core_warn(dev, 339 "Tried to query dev cap type(%x) with wrong opmode(%x)\n", 340 cap_type, cap_mode); 341 err = -EINVAL; 342 break; 343 } 344 query_ex: 345 kfree(out); 346 return err; 347 } 348 349 int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type) 350 { 351 int ret; 352 353 ret = mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_CUR); 354 if (ret) 355 return ret; 356 return mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_MAX); 357 } 358 359 static int set_caps(struct mlx5_core_dev *dev, void *in, int opmod) 360 { 361 MLX5_SET(set_hca_cap_in, in, opcode, MLX5_CMD_OP_SET_HCA_CAP); 362 MLX5_SET(set_hca_cap_in, in, op_mod, opmod << 1); 363 return mlx5_cmd_exec_in(dev, set_hca_cap, in); 364 } 365 366 static int handle_hca_cap_atomic(struct mlx5_core_dev *dev, void *set_ctx) 367 { 368 void *set_hca_cap; 369 int req_endianness; 370 int err; 371 372 if (!MLX5_CAP_GEN(dev, atomic)) 373 return 0; 374 375 err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC); 376 if (err) 377 return err; 378 379 req_endianness = 380 MLX5_CAP_ATOMIC(dev, 381 supported_atomic_req_8B_endianness_mode_1); 382 383 if (req_endianness != MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS) 384 return 0; 385 386 set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability); 387 388 /* Set requestor to host endianness */ 389 MLX5_SET(atomic_caps, set_hca_cap, atomic_req_8B_endianness_mode, 390 MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS); 391 392 return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_ATOMIC); 393 } 394 395 static int handle_hca_cap_odp(struct mlx5_core_dev *dev, void *set_ctx) 396 { 397 bool do_set = false, mem_page_fault = false; 398 void *set_hca_cap; 399 int err; 400 401 if (!IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) || 402 !MLX5_CAP_GEN(dev, pg)) 403 return 0; 404 405 err = mlx5_core_get_caps(dev, MLX5_CAP_ODP); 406 if (err) 407 return err; 408 409 set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability); 410 memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_ODP]->cur, 411 MLX5_ST_SZ_BYTES(odp_cap)); 412 413 /* For best performance, enable memory scheme ODP only when 414 * it has page prefetch enabled. 415 */ 416 if (MLX5_CAP_ODP_MAX(dev, mem_page_fault) && 417 MLX5_CAP_ODP_MAX(dev, memory_page_fault_scheme_cap.page_prefetch)) { 418 mem_page_fault = true; 419 do_set = true; 420 MLX5_SET(odp_cap, set_hca_cap, mem_page_fault, mem_page_fault); 421 goto set; 422 } 423 424 #define ODP_CAP_SET_MAX(dev, field) \ 425 do { \ 426 u32 _res = MLX5_CAP_ODP_MAX(dev, field); \ 427 if (_res) { \ 428 do_set = true; \ 429 MLX5_SET(odp_cap, set_hca_cap, field, _res); \ 430 } \ 431 } while (0) 432 433 ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.ud_odp_caps.srq_receive); 434 ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.rc_odp_caps.srq_receive); 435 ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.xrc_odp_caps.srq_receive); 436 ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.xrc_odp_caps.send); 437 ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.xrc_odp_caps.receive); 438 ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.xrc_odp_caps.write); 439 ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.xrc_odp_caps.read); 440 ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.xrc_odp_caps.atomic); 441 ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.dc_odp_caps.srq_receive); 442 ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.dc_odp_caps.send); 443 ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.dc_odp_caps.receive); 444 ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.dc_odp_caps.write); 445 ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.dc_odp_caps.read); 446 ODP_CAP_SET_MAX(dev, transport_page_fault_scheme_cap.dc_odp_caps.atomic); 447 448 set: 449 if (do_set) 450 err = set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_ODP); 451 452 mlx5_core_dbg(dev, "Using ODP %s scheme\n", 453 mem_page_fault ? "memory" : "transport"); 454 return err; 455 } 456 457 static int max_uc_list_get_devlink_param(struct mlx5_core_dev *dev) 458 { 459 struct devlink *devlink = priv_to_devlink(dev); 460 union devlink_param_value val; 461 int err; 462 463 err = devl_param_driverinit_value_get(devlink, 464 DEVLINK_PARAM_GENERIC_ID_MAX_MACS, 465 &val); 466 if (!err) 467 return val.vu32; 468 mlx5_core_dbg(dev, "Failed to get param. err = %d\n", err); 469 return err; 470 } 471 472 bool mlx5_is_roce_on(struct mlx5_core_dev *dev) 473 { 474 struct devlink *devlink = priv_to_devlink(dev); 475 union devlink_param_value val; 476 int err; 477 478 err = devl_param_driverinit_value_get(devlink, 479 DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE, 480 &val); 481 482 if (!err) 483 return val.vbool; 484 485 mlx5_core_dbg(dev, "Failed to get param. err = %d\n", err); 486 return MLX5_CAP_GEN(dev, roce); 487 } 488 EXPORT_SYMBOL(mlx5_is_roce_on); 489 490 static int handle_hca_cap_2(struct mlx5_core_dev *dev, void *set_ctx) 491 { 492 bool do_set = false; 493 void *set_hca_cap; 494 int err; 495 496 if (!MLX5_CAP_GEN_MAX(dev, hca_cap_2)) 497 return 0; 498 499 err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL_2); 500 if (err) 501 return err; 502 503 set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, 504 capability); 505 memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_GENERAL_2]->cur, 506 MLX5_ST_SZ_BYTES(cmd_hca_cap_2)); 507 508 if (MLX5_CAP_GEN_2_MAX(dev, sw_vhca_id_valid) && 509 dev->priv.sw_vhca_id > 0) { 510 MLX5_SET(cmd_hca_cap_2, set_hca_cap, sw_vhca_id_valid, 1); 511 do_set = true; 512 } 513 514 if (MLX5_CAP_GEN_2_MAX(dev, lag_per_mp_group)) { 515 MLX5_SET(cmd_hca_cap_2, set_hca_cap, lag_per_mp_group, 1); 516 do_set = true; 517 } 518 519 /* some FW versions that support querying MLX5_CAP_GENERAL_2 520 * capabilities but don't support setting them. 521 * Skip unnecessary update to hca_cap_2 when no changes were introduced 522 */ 523 return do_set ? set_caps(dev, set_ctx, MLX5_CAP_GENERAL_2) : 0; 524 } 525 526 static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) 527 { 528 struct mlx5_profile *prof = &dev->profile; 529 void *set_hca_cap; 530 int err; 531 532 err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL); 533 if (err) 534 return err; 535 536 set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, 537 capability); 538 memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_GENERAL]->cur, 539 MLX5_ST_SZ_BYTES(cmd_hca_cap)); 540 541 mlx5_core_dbg(dev, "Current Pkey table size %d Setting new size %d\n", 542 mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(dev, pkey_table_size)), 543 128); 544 /* we limit the size of the pkey table to 128 entries for now */ 545 MLX5_SET(cmd_hca_cap, set_hca_cap, pkey_table_size, 546 to_fw_pkey_sz(dev, 128)); 547 548 /* Check log_max_qp from HCA caps to set in current profile */ 549 if (prof->log_max_qp == LOG_MAX_SUPPORTED_QPS) { 550 prof->log_max_qp = min_t(u8, 18, MLX5_CAP_GEN_MAX(dev, log_max_qp)); 551 } else if (MLX5_CAP_GEN_MAX(dev, log_max_qp) < prof->log_max_qp) { 552 mlx5_core_warn(dev, "log_max_qp value in current profile is %d, changing it to HCA capability limit (%d)\n", 553 prof->log_max_qp, 554 MLX5_CAP_GEN_MAX(dev, log_max_qp)); 555 prof->log_max_qp = MLX5_CAP_GEN_MAX(dev, log_max_qp); 556 } 557 if (prof->mask & MLX5_PROF_MASK_QP_SIZE) 558 MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_qp, 559 prof->log_max_qp); 560 561 /* disable cmdif checksum */ 562 MLX5_SET(cmd_hca_cap, set_hca_cap, cmdif_checksum, 0); 563 564 /* Enable 4K UAR only when HCA supports it and page size is bigger 565 * than 4K. 566 */ 567 if (MLX5_CAP_GEN_MAX(dev, uar_4k) && PAGE_SIZE > 4096) 568 MLX5_SET(cmd_hca_cap, set_hca_cap, uar_4k, 1); 569 570 MLX5_SET(cmd_hca_cap, set_hca_cap, log_uar_page_sz, PAGE_SHIFT - 12); 571 572 if (MLX5_CAP_GEN_MAX(dev, cache_line_128byte)) 573 MLX5_SET(cmd_hca_cap, 574 set_hca_cap, 575 cache_line_128byte, 576 cache_line_size() >= 128 ? 1 : 0); 577 578 if (MLX5_CAP_GEN_MAX(dev, dct)) 579 MLX5_SET(cmd_hca_cap, set_hca_cap, dct, 1); 580 581 if (MLX5_CAP_GEN_MAX(dev, pci_sync_for_fw_update_event)) 582 MLX5_SET(cmd_hca_cap, set_hca_cap, pci_sync_for_fw_update_event, 1); 583 if (MLX5_CAP_GEN_MAX(dev, pci_sync_for_fw_update_with_driver_unload)) 584 MLX5_SET(cmd_hca_cap, set_hca_cap, 585 pci_sync_for_fw_update_with_driver_unload, 1); 586 if (MLX5_CAP_GEN_MAX(dev, pcie_reset_using_hotreset_method)) 587 MLX5_SET(cmd_hca_cap, set_hca_cap, 588 pcie_reset_using_hotreset_method, 1); 589 590 if (MLX5_CAP_GEN_MAX(dev, num_vhca_ports)) 591 MLX5_SET(cmd_hca_cap, 592 set_hca_cap, 593 num_vhca_ports, 594 MLX5_CAP_GEN_MAX(dev, num_vhca_ports)); 595 596 if (MLX5_CAP_GEN_MAX(dev, release_all_pages)) 597 MLX5_SET(cmd_hca_cap, set_hca_cap, release_all_pages, 1); 598 599 if (MLX5_CAP_GEN_MAX(dev, icm_mng_function_id_mode)) 600 MLX5_SET(cmd_hca_cap, set_hca_cap, icm_mng_function_id_mode, 1); 601 602 if (MLX5_CAP_GEN_MAX(dev, mkey_by_name)) 603 MLX5_SET(cmd_hca_cap, set_hca_cap, mkey_by_name, 1); 604 605 mlx5_vhca_state_cap_handle(dev, set_hca_cap); 606 607 if (MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix)) 608 MLX5_SET(cmd_hca_cap, set_hca_cap, num_total_dynamic_vf_msix, 609 MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix)); 610 611 if (MLX5_CAP_GEN(dev, roce_rw_supported) && MLX5_CAP_GEN_MAX(dev, roce)) 612 MLX5_SET(cmd_hca_cap, set_hca_cap, roce, 613 mlx5_is_roce_on(dev)); 614 615 if (MLX5_CAP_GEN_MAX(dev, log_max_current_uc_list)) { 616 int max_uc_list = max_uc_list_get_devlink_param(dev); 617 618 if (max_uc_list > 0) 619 MLX5_SET(cmd_hca_cap, set_hca_cap, 620 log_max_current_uc_list, ilog2(max_uc_list)); 621 } 622 623 /* enable absolute native port num */ 624 if (MLX5_CAP_GEN_MAX(dev, abs_native_port_num)) 625 MLX5_SET(cmd_hca_cap, set_hca_cap, abs_native_port_num, 1); 626 627 return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE); 628 } 629 630 /* Cached MLX5_CAP_GEN(dev, roce) can be out of sync this early in the 631 * boot process. 632 * In case RoCE cap is writable in FW and user/devlink requested to change the 633 * cap, we are yet to query the final state of the above cap. 634 * Hence, the need for this function. 635 * 636 * Returns 637 * True: 638 * 1) RoCE cap is read only in FW and already disabled 639 * OR: 640 * 2) RoCE cap is writable in FW and user/devlink requested it off. 641 * 642 * In any other case, return False. 643 */ 644 static bool is_roce_fw_disabled(struct mlx5_core_dev *dev) 645 { 646 return (MLX5_CAP_GEN(dev, roce_rw_supported) && !mlx5_is_roce_on(dev)) || 647 (!MLX5_CAP_GEN(dev, roce_rw_supported) && !MLX5_CAP_GEN(dev, roce)); 648 } 649 650 static int handle_hca_cap_roce(struct mlx5_core_dev *dev, void *set_ctx) 651 { 652 void *set_hca_cap; 653 int err; 654 655 if (is_roce_fw_disabled(dev)) 656 return 0; 657 658 err = mlx5_core_get_caps(dev, MLX5_CAP_ROCE); 659 if (err) 660 return err; 661 662 if (MLX5_CAP_ROCE(dev, sw_r_roce_src_udp_port) || 663 !MLX5_CAP_ROCE_MAX(dev, sw_r_roce_src_udp_port)) 664 return 0; 665 666 set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability); 667 memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_ROCE]->cur, 668 MLX5_ST_SZ_BYTES(roce_cap)); 669 MLX5_SET(roce_cap, set_hca_cap, sw_r_roce_src_udp_port, 1); 670 671 if (MLX5_CAP_ROCE_MAX(dev, qp_ooo_transmit_default)) 672 MLX5_SET(roce_cap, set_hca_cap, qp_ooo_transmit_default, 1); 673 674 err = set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_ROCE); 675 return err; 676 } 677 678 static int handle_hca_cap_port_selection(struct mlx5_core_dev *dev, 679 void *set_ctx) 680 { 681 void *set_hca_cap; 682 int err; 683 684 if (!MLX5_CAP_GEN(dev, port_selection_cap)) 685 return 0; 686 687 err = mlx5_core_get_caps(dev, MLX5_CAP_PORT_SELECTION); 688 if (err) 689 return err; 690 691 if (MLX5_CAP_PORT_SELECTION(dev, port_select_flow_table_bypass) || 692 !MLX5_CAP_PORT_SELECTION_MAX(dev, port_select_flow_table_bypass)) 693 return 0; 694 695 set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability); 696 memcpy(set_hca_cap, dev->caps.hca[MLX5_CAP_PORT_SELECTION]->cur, 697 MLX5_ST_SZ_BYTES(port_selection_cap)); 698 MLX5_SET(port_selection_cap, set_hca_cap, port_select_flow_table_bypass, 1); 699 700 err = set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_PORT_SELECTION); 701 702 return err; 703 } 704 705 static int set_hca_cap(struct mlx5_core_dev *dev) 706 { 707 int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); 708 void *set_ctx; 709 int err; 710 711 set_ctx = kzalloc(set_sz, GFP_KERNEL); 712 if (!set_ctx) 713 return -ENOMEM; 714 715 err = handle_hca_cap(dev, set_ctx); 716 if (err) { 717 mlx5_core_err(dev, "handle_hca_cap failed\n"); 718 goto out; 719 } 720 721 memset(set_ctx, 0, set_sz); 722 err = handle_hca_cap_atomic(dev, set_ctx); 723 if (err) { 724 mlx5_core_err(dev, "handle_hca_cap_atomic failed\n"); 725 goto out; 726 } 727 728 memset(set_ctx, 0, set_sz); 729 err = handle_hca_cap_odp(dev, set_ctx); 730 if (err) { 731 mlx5_core_err(dev, "handle_hca_cap_odp failed\n"); 732 goto out; 733 } 734 735 memset(set_ctx, 0, set_sz); 736 err = handle_hca_cap_roce(dev, set_ctx); 737 if (err) { 738 mlx5_core_err(dev, "handle_hca_cap_roce failed\n"); 739 goto out; 740 } 741 742 memset(set_ctx, 0, set_sz); 743 err = handle_hca_cap_2(dev, set_ctx); 744 if (err) { 745 mlx5_core_err(dev, "handle_hca_cap_2 failed\n"); 746 goto out; 747 } 748 749 memset(set_ctx, 0, set_sz); 750 err = handle_hca_cap_port_selection(dev, set_ctx); 751 if (err) { 752 mlx5_core_err(dev, "handle_hca_cap_port_selection failed\n"); 753 goto out; 754 } 755 756 out: 757 kfree(set_ctx); 758 return err; 759 } 760 761 static int set_hca_ctrl(struct mlx5_core_dev *dev) 762 { 763 struct mlx5_reg_host_endianness he_in; 764 struct mlx5_reg_host_endianness he_out; 765 int err; 766 767 if (!mlx5_core_is_pf(dev)) 768 return 0; 769 770 memset(&he_in, 0, sizeof(he_in)); 771 he_in.he = MLX5_SET_HOST_ENDIANNESS; 772 err = mlx5_core_access_reg(dev, &he_in, sizeof(he_in), 773 &he_out, sizeof(he_out), 774 MLX5_REG_HOST_ENDIANNESS, 0, 1); 775 return err; 776 } 777 778 static int mlx5_core_set_hca_defaults(struct mlx5_core_dev *dev) 779 { 780 int ret = 0; 781 782 /* Disable local_lb by default */ 783 if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH) 784 ret = mlx5_nic_vport_update_local_lb(dev, false); 785 786 return ret; 787 } 788 789 int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id) 790 { 791 u32 in[MLX5_ST_SZ_DW(enable_hca_in)] = {}; 792 793 MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA); 794 MLX5_SET(enable_hca_in, in, function_id, func_id); 795 MLX5_SET(enable_hca_in, in, embedded_cpu_function, 796 dev->caps.embedded_cpu); 797 return mlx5_cmd_exec_in(dev, enable_hca, in); 798 } 799 800 int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id) 801 { 802 u32 in[MLX5_ST_SZ_DW(disable_hca_in)] = {}; 803 804 MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA); 805 MLX5_SET(disable_hca_in, in, function_id, func_id); 806 MLX5_SET(enable_hca_in, in, embedded_cpu_function, 807 dev->caps.embedded_cpu); 808 return mlx5_cmd_exec_in(dev, disable_hca, in); 809 } 810 811 static int mlx5_core_set_issi(struct mlx5_core_dev *dev) 812 { 813 u32 query_out[MLX5_ST_SZ_DW(query_issi_out)] = {}; 814 u32 query_in[MLX5_ST_SZ_DW(query_issi_in)] = {}; 815 u32 sup_issi; 816 int err; 817 818 MLX5_SET(query_issi_in, query_in, opcode, MLX5_CMD_OP_QUERY_ISSI); 819 err = mlx5_cmd_exec_inout(dev, query_issi, query_in, query_out); 820 if (err) { 821 u32 syndrome = MLX5_GET(query_issi_out, query_out, syndrome); 822 u8 status = MLX5_GET(query_issi_out, query_out, status); 823 824 if (!status || syndrome == MLX5_DRIVER_SYND) { 825 mlx5_core_err(dev, "Failed to query ISSI err(%d) status(%d) synd(%d)\n", 826 err, status, syndrome); 827 return err; 828 } 829 830 mlx5_core_warn(dev, "Query ISSI is not supported by FW, ISSI is 0\n"); 831 dev->issi = 0; 832 return 0; 833 } 834 835 sup_issi = MLX5_GET(query_issi_out, query_out, supported_issi_dw0); 836 837 if (sup_issi & (1 << 1)) { 838 u32 set_in[MLX5_ST_SZ_DW(set_issi_in)] = {}; 839 840 MLX5_SET(set_issi_in, set_in, opcode, MLX5_CMD_OP_SET_ISSI); 841 MLX5_SET(set_issi_in, set_in, current_issi, 1); 842 err = mlx5_cmd_exec_in(dev, set_issi, set_in); 843 if (err) { 844 mlx5_core_err(dev, "Failed to set ISSI to 1 err(%d)\n", 845 err); 846 return err; 847 } 848 849 dev->issi = 1; 850 851 return 0; 852 } else if (sup_issi & (1 << 0) || !sup_issi) { 853 return 0; 854 } 855 856 return -EOPNOTSUPP; 857 } 858 859 static int mlx5_pci_init(struct mlx5_core_dev *dev, struct pci_dev *pdev, 860 const struct pci_device_id *id) 861 { 862 int err = 0; 863 864 mutex_init(&dev->pci_status_mutex); 865 pci_set_drvdata(dev->pdev, dev); 866 867 dev->bar_addr = pci_resource_start(pdev, 0); 868 869 err = mlx5_pci_enable_device(dev); 870 if (err) { 871 mlx5_core_err(dev, "Cannot enable PCI device, aborting\n"); 872 return err; 873 } 874 875 err = request_bar(pdev); 876 if (err) { 877 mlx5_core_err(dev, "error requesting BARs, aborting\n"); 878 goto err_disable; 879 } 880 881 pci_set_master(pdev); 882 883 err = set_dma_caps(pdev); 884 if (err) { 885 mlx5_core_err(dev, "Failed setting DMA capabilities mask, aborting\n"); 886 goto err_clr_master; 887 } 888 889 if (pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP32) && 890 pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP64) && 891 pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP128)) 892 mlx5_core_dbg(dev, "Enabling pci atomics failed\n"); 893 894 dev->iseg = ioremap(dev->bar_addr, sizeof(*dev->iseg)); 895 if (!dev->iseg) { 896 err = -ENOMEM; 897 mlx5_core_err(dev, "Failed mapping initialization segment, aborting\n"); 898 goto err_clr_master; 899 } 900 901 mlx5_pci_vsc_init(dev); 902 903 pci_enable_ptm(pdev); 904 905 return 0; 906 907 err_clr_master: 908 release_bar(dev->pdev); 909 err_disable: 910 mlx5_pci_disable_device(dev); 911 return err; 912 } 913 914 static void mlx5_pci_close(struct mlx5_core_dev *dev) 915 { 916 /* health work might still be active, and it needs pci bar in 917 * order to know the NIC state. Therefore, drain the health WQ 918 * before removing the pci bars 919 */ 920 mlx5_drain_health_wq(dev); 921 pci_disable_ptm(dev->pdev); 922 iounmap(dev->iseg); 923 release_bar(dev->pdev); 924 mlx5_pci_disable_device(dev); 925 } 926 927 static int mlx5_init_once(struct mlx5_core_dev *dev) 928 { 929 int err; 930 931 dev->priv.devc = mlx5_devcom_register_device(dev); 932 if (!dev->priv.devc) 933 mlx5_core_warn(dev, "failed to register devcom device\n"); 934 935 err = mlx5_query_board_id(dev); 936 if (err) { 937 mlx5_core_err(dev, "query board id failed\n"); 938 goto err_devcom; 939 } 940 941 err = mlx5_irq_table_init(dev); 942 if (err) { 943 mlx5_core_err(dev, "failed to initialize irq table\n"); 944 goto err_devcom; 945 } 946 947 err = mlx5_eq_table_init(dev); 948 if (err) { 949 mlx5_core_err(dev, "failed to initialize eq\n"); 950 goto err_irq_cleanup; 951 } 952 953 err = mlx5_fw_reset_init(dev); 954 if (err) { 955 mlx5_core_err(dev, "failed to initialize fw reset events\n"); 956 goto err_eq_cleanup; 957 } 958 959 mlx5_cq_debugfs_init(dev); 960 961 mlx5_init_reserved_gids(dev); 962 963 err = mlx5_init_clock(dev); 964 if (err) { 965 mlx5_core_err(dev, "failed to initialize hardware clock\n"); 966 goto err_tables_cleanup; 967 } 968 969 dev->vxlan = mlx5_vxlan_create(dev); 970 dev->geneve = mlx5_geneve_create(dev); 971 972 err = mlx5_init_rl_table(dev); 973 if (err) { 974 mlx5_core_err(dev, "Failed to init rate limiting\n"); 975 goto err_clock_cleanup; 976 } 977 978 err = mlx5_mpfs_init(dev); 979 if (err) { 980 mlx5_core_err(dev, "Failed to init l2 table %d\n", err); 981 goto err_rl_cleanup; 982 } 983 984 err = mlx5_sriov_init(dev); 985 if (err) { 986 mlx5_core_err(dev, "Failed to init sriov %d\n", err); 987 goto err_mpfs_cleanup; 988 } 989 990 err = mlx5_eswitch_init(dev); 991 if (err) { 992 mlx5_core_err(dev, "Failed to init eswitch %d\n", err); 993 goto err_sriov_cleanup; 994 } 995 mlx5_pages_by_func_type_debugfs_init(dev); 996 997 err = mlx5_fpga_init(dev); 998 if (err) { 999 mlx5_core_err(dev, "Failed to init fpga device %d\n", err); 1000 goto err_page_debugfs_cleanup; 1001 } 1002 1003 err = mlx5_vhca_event_init(dev); 1004 if (err) { 1005 mlx5_core_err(dev, "Failed to init vhca event notifier %d\n", err); 1006 goto err_fpga_cleanup; 1007 } 1008 1009 err = mlx5_sf_hw_table_init(dev); 1010 if (err) { 1011 mlx5_core_err(dev, "Failed to init SF HW table %d\n", err); 1012 goto err_sf_hw_table_cleanup; 1013 } 1014 1015 err = mlx5_sf_table_init(dev); 1016 if (err) { 1017 mlx5_core_err(dev, "Failed to init SF table %d\n", err); 1018 goto err_sf_table_cleanup; 1019 } 1020 1021 err = mlx5_fs_core_alloc(dev); 1022 if (err) { 1023 mlx5_core_err(dev, "Failed to alloc flow steering\n"); 1024 goto err_fs; 1025 } 1026 1027 dev->dm = mlx5_dm_create(dev); 1028 dev->st = mlx5_st_create(dev); 1029 dev->tracer = mlx5_fw_tracer_create(dev); 1030 dev->hv_vhca = mlx5_hv_vhca_create(dev); 1031 dev->rsc_dump = mlx5_rsc_dump_create(dev); 1032 1033 return 0; 1034 1035 err_fs: 1036 mlx5_sf_table_cleanup(dev); 1037 err_sf_table_cleanup: 1038 mlx5_sf_hw_table_cleanup(dev); 1039 err_sf_hw_table_cleanup: 1040 mlx5_vhca_event_cleanup(dev); 1041 err_fpga_cleanup: 1042 mlx5_fpga_cleanup(dev); 1043 err_page_debugfs_cleanup: 1044 mlx5_pages_by_func_type_debugfs_cleanup(dev); 1045 mlx5_eswitch_cleanup(dev->priv.eswitch); 1046 err_sriov_cleanup: 1047 mlx5_sriov_cleanup(dev); 1048 err_mpfs_cleanup: 1049 mlx5_mpfs_cleanup(dev); 1050 err_rl_cleanup: 1051 mlx5_cleanup_rl_table(dev); 1052 err_clock_cleanup: 1053 mlx5_geneve_destroy(dev->geneve); 1054 mlx5_vxlan_destroy(dev->vxlan); 1055 mlx5_cleanup_clock(dev); 1056 err_tables_cleanup: 1057 mlx5_cleanup_reserved_gids(dev); 1058 mlx5_cq_debugfs_cleanup(dev); 1059 mlx5_fw_reset_cleanup(dev); 1060 err_eq_cleanup: 1061 mlx5_eq_table_cleanup(dev); 1062 err_irq_cleanup: 1063 mlx5_irq_table_cleanup(dev); 1064 err_devcom: 1065 mlx5_devcom_unregister_device(dev->priv.devc); 1066 1067 return err; 1068 } 1069 1070 static void mlx5_cleanup_once(struct mlx5_core_dev *dev) 1071 { 1072 mlx5_rsc_dump_destroy(dev); 1073 mlx5_hv_vhca_destroy(dev->hv_vhca); 1074 mlx5_fw_tracer_destroy(dev->tracer); 1075 mlx5_st_destroy(dev); 1076 mlx5_dm_cleanup(dev); 1077 mlx5_fs_core_free(dev); 1078 mlx5_sf_table_cleanup(dev); 1079 mlx5_sf_hw_table_cleanup(dev); 1080 mlx5_vhca_event_cleanup(dev); 1081 mlx5_fpga_cleanup(dev); 1082 mlx5_pages_by_func_type_debugfs_cleanup(dev); 1083 mlx5_eswitch_cleanup(dev->priv.eswitch); 1084 mlx5_sriov_cleanup(dev); 1085 mlx5_mpfs_cleanup(dev); 1086 mlx5_cleanup_rl_table(dev); 1087 mlx5_geneve_destroy(dev->geneve); 1088 mlx5_vxlan_destroy(dev->vxlan); 1089 mlx5_cleanup_clock(dev); 1090 mlx5_cleanup_reserved_gids(dev); 1091 mlx5_cq_debugfs_cleanup(dev); 1092 mlx5_fw_reset_cleanup(dev); 1093 mlx5_eq_table_cleanup(dev); 1094 mlx5_irq_table_cleanup(dev); 1095 mlx5_devcom_unregister_device(dev->priv.devc); 1096 } 1097 1098 static int mlx5_function_enable(struct mlx5_core_dev *dev, bool boot, u64 timeout) 1099 { 1100 int err; 1101 1102 mlx5_core_info(dev, "firmware version: %d.%d.%d\n", fw_rev_maj(dev), 1103 fw_rev_min(dev), fw_rev_sub(dev)); 1104 1105 /* Only PFs hold the relevant PCIe information for this query */ 1106 if (mlx5_core_is_pf(dev)) 1107 pcie_print_link_status(dev->pdev); 1108 1109 /* wait for firmware to accept initialization segments configurations 1110 */ 1111 err = wait_fw_init(dev, timeout, 1112 mlx5_tout_ms(dev, FW_PRE_INIT_WARN_MESSAGE_INTERVAL), 1113 "pre-initializing"); 1114 if (err) 1115 return err; 1116 1117 err = mlx5_cmd_enable(dev); 1118 if (err) { 1119 mlx5_core_err(dev, "Failed initializing command interface, aborting\n"); 1120 return err; 1121 } 1122 1123 mlx5_tout_query_iseg(dev); 1124 1125 err = wait_fw_init(dev, mlx5_tout_ms(dev, FW_INIT), 0, "initializing"); 1126 if (err) 1127 goto err_cmd_cleanup; 1128 1129 dev->caps.embedded_cpu = mlx5_read_embedded_cpu(dev); 1130 mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_UP); 1131 1132 err = mlx5_core_enable_hca(dev, 0); 1133 if (err) { 1134 mlx5_core_err(dev, "enable hca failed\n"); 1135 goto err_cmd_cleanup; 1136 } 1137 1138 mlx5_start_health_poll(dev); 1139 1140 err = mlx5_core_set_issi(dev); 1141 if (err) { 1142 mlx5_core_err(dev, "failed to set issi\n"); 1143 goto stop_health_poll; 1144 } 1145 1146 err = mlx5_satisfy_startup_pages(dev, 1); 1147 if (err) { 1148 mlx5_core_err(dev, "failed to allocate boot pages\n"); 1149 goto stop_health_poll; 1150 } 1151 1152 err = mlx5_tout_query_dtor(dev); 1153 if (err) { 1154 mlx5_core_err(dev, "failed to read dtor\n"); 1155 goto reclaim_boot_pages; 1156 } 1157 1158 return 0; 1159 1160 reclaim_boot_pages: 1161 mlx5_reclaim_startup_pages(dev); 1162 stop_health_poll: 1163 mlx5_stop_health_poll(dev, boot); 1164 mlx5_core_disable_hca(dev, 0); 1165 err_cmd_cleanup: 1166 mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN); 1167 mlx5_cmd_disable(dev); 1168 1169 return err; 1170 } 1171 1172 static void mlx5_function_disable(struct mlx5_core_dev *dev, bool boot) 1173 { 1174 mlx5_reclaim_startup_pages(dev); 1175 mlx5_stop_health_poll(dev, boot); 1176 mlx5_core_disable_hca(dev, 0); 1177 mlx5_cmd_set_state(dev, MLX5_CMDIF_STATE_DOWN); 1178 mlx5_cmd_disable(dev); 1179 } 1180 1181 static int mlx5_function_open(struct mlx5_core_dev *dev) 1182 { 1183 int err; 1184 1185 err = set_hca_ctrl(dev); 1186 if (err) { 1187 mlx5_core_err(dev, "set_hca_ctrl failed\n"); 1188 return err; 1189 } 1190 1191 err = set_hca_cap(dev); 1192 if (err) { 1193 mlx5_core_err(dev, "set_hca_cap failed\n"); 1194 return err; 1195 } 1196 1197 err = mlx5_satisfy_startup_pages(dev, 0); 1198 if (err) { 1199 mlx5_core_err(dev, "failed to allocate init pages\n"); 1200 return err; 1201 } 1202 1203 err = mlx5_cmd_init_hca(dev, sw_owner_id); 1204 if (err) { 1205 mlx5_core_err(dev, "init hca failed\n"); 1206 return err; 1207 } 1208 1209 mlx5_set_driver_version(dev); 1210 1211 err = mlx5_query_hca_caps(dev); 1212 if (err) { 1213 mlx5_core_err(dev, "query hca failed\n"); 1214 return err; 1215 } 1216 mlx5_start_health_fw_log_up(dev); 1217 return 0; 1218 } 1219 1220 static int mlx5_function_close(struct mlx5_core_dev *dev) 1221 { 1222 int err; 1223 1224 err = mlx5_cmd_teardown_hca(dev); 1225 if (err) { 1226 mlx5_core_err(dev, "tear_down_hca failed, skip cleanup\n"); 1227 return err; 1228 } 1229 1230 return 0; 1231 } 1232 1233 static int mlx5_function_setup(struct mlx5_core_dev *dev, bool boot, u64 timeout) 1234 { 1235 int err; 1236 1237 err = mlx5_function_enable(dev, boot, timeout); 1238 if (err) 1239 return err; 1240 1241 err = mlx5_function_open(dev); 1242 if (err) 1243 mlx5_function_disable(dev, boot); 1244 return err; 1245 } 1246 1247 static int mlx5_function_teardown(struct mlx5_core_dev *dev, bool boot) 1248 { 1249 int err = mlx5_function_close(dev); 1250 1251 if (!err) 1252 mlx5_function_disable(dev, boot); 1253 else 1254 mlx5_stop_health_poll(dev, boot); 1255 1256 return err; 1257 } 1258 1259 static int mlx5_load(struct mlx5_core_dev *dev) 1260 { 1261 int err; 1262 1263 err = mlx5_alloc_bfreg(dev, &dev->priv.bfreg, false, false); 1264 if (err) { 1265 mlx5_core_err(dev, "Failed allocating bfreg, %d\n", err); 1266 return err; 1267 } 1268 1269 mlx5_events_start(dev); 1270 mlx5_pagealloc_start(dev); 1271 1272 err = mlx5_irq_table_create(dev); 1273 if (err) { 1274 mlx5_core_err(dev, "Failed to alloc IRQs\n"); 1275 goto err_irq_table; 1276 } 1277 1278 err = mlx5_eq_table_create(dev); 1279 if (err) { 1280 mlx5_core_err(dev, "Failed to create EQs\n"); 1281 goto err_eq_table; 1282 } 1283 1284 mlx5_clock_load(dev); 1285 1286 err = mlx5_fw_tracer_init(dev->tracer); 1287 if (err) { 1288 mlx5_core_err(dev, "Failed to init FW tracer %d\n", err); 1289 mlx5_fw_tracer_destroy(dev->tracer); 1290 dev->tracer = NULL; 1291 } 1292 1293 mlx5_fw_reset_events_start(dev); 1294 mlx5_hv_vhca_init(dev->hv_vhca); 1295 1296 err = mlx5_rsc_dump_init(dev); 1297 if (err) { 1298 mlx5_core_err(dev, "Failed to init Resource dump %d\n", err); 1299 mlx5_rsc_dump_destroy(dev); 1300 dev->rsc_dump = NULL; 1301 } 1302 1303 err = mlx5_fpga_device_start(dev); 1304 if (err) { 1305 mlx5_core_err(dev, "fpga device start failed %d\n", err); 1306 goto err_fpga_start; 1307 } 1308 1309 err = mlx5_fs_core_init(dev); 1310 if (err) { 1311 mlx5_core_err(dev, "Failed to init flow steering\n"); 1312 goto err_fs; 1313 } 1314 1315 err = mlx5_core_set_hca_defaults(dev); 1316 if (err) { 1317 mlx5_core_err(dev, "Failed to set hca defaults\n"); 1318 goto err_set_hca; 1319 } 1320 1321 mlx5_vhca_event_start(dev); 1322 1323 err = mlx5_ec_init(dev); 1324 if (err) { 1325 mlx5_core_err(dev, "Failed to init embedded CPU\n"); 1326 goto err_ec; 1327 } 1328 1329 mlx5_lag_add_mdev(dev); 1330 err = mlx5_sriov_attach(dev); 1331 if (err) { 1332 mlx5_core_err(dev, "sriov init failed %d\n", err); 1333 goto err_sriov; 1334 } 1335 1336 mlx5_sf_dev_table_create(dev); 1337 1338 err = mlx5_devlink_traps_register(priv_to_devlink(dev)); 1339 if (err) 1340 goto err_traps_reg; 1341 1342 return 0; 1343 1344 err_traps_reg: 1345 mlx5_sf_dev_table_destroy(dev); 1346 mlx5_sriov_detach(dev); 1347 err_sriov: 1348 mlx5_lag_remove_mdev(dev); 1349 mlx5_ec_cleanup(dev); 1350 err_ec: 1351 mlx5_vhca_event_stop(dev); 1352 err_set_hca: 1353 mlx5_fs_core_cleanup(dev); 1354 err_fs: 1355 mlx5_fpga_device_stop(dev); 1356 err_fpga_start: 1357 mlx5_rsc_dump_cleanup(dev); 1358 mlx5_hv_vhca_cleanup(dev->hv_vhca); 1359 mlx5_fw_reset_events_stop(dev); 1360 mlx5_fw_tracer_cleanup(dev->tracer); 1361 mlx5_clock_unload(dev); 1362 mlx5_eq_table_destroy(dev); 1363 err_eq_table: 1364 mlx5_irq_table_destroy(dev); 1365 err_irq_table: 1366 mlx5_pagealloc_stop(dev); 1367 mlx5_events_stop(dev); 1368 mlx5_free_bfreg(dev, &dev->priv.bfreg); 1369 return err; 1370 } 1371 1372 static void mlx5_unload(struct mlx5_core_dev *dev) 1373 { 1374 mlx5_devlink_traps_unregister(priv_to_devlink(dev)); 1375 mlx5_vhca_event_stop(dev); 1376 mlx5_sf_dev_table_destroy(dev); 1377 mlx5_sriov_detach(dev); 1378 mlx5_lag_remove_mdev(dev); 1379 mlx5_ec_cleanup(dev); 1380 mlx5_sf_hw_table_destroy(dev); 1381 mlx5_fs_core_cleanup(dev); 1382 mlx5_fpga_device_stop(dev); 1383 mlx5_rsc_dump_cleanup(dev); 1384 mlx5_hv_vhca_cleanup(dev->hv_vhca); 1385 mlx5_fw_reset_events_stop(dev); 1386 mlx5_fw_tracer_cleanup(dev->tracer); 1387 mlx5_clock_unload(dev); 1388 mlx5_eq_table_destroy(dev); 1389 mlx5_irq_table_destroy(dev); 1390 mlx5_pagealloc_stop(dev); 1391 mlx5_events_stop(dev); 1392 mlx5_free_bfreg(dev, &dev->priv.bfreg); 1393 } 1394 1395 int mlx5_init_one_devl_locked(struct mlx5_core_dev *dev) 1396 { 1397 bool light_probe = mlx5_dev_is_lightweight(dev); 1398 int err = 0; 1399 1400 mutex_lock(&dev->intf_state_mutex); 1401 dev->state = MLX5_DEVICE_STATE_UP; 1402 1403 err = mlx5_function_setup(dev, true, mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT)); 1404 if (err) 1405 goto err_function; 1406 1407 err = mlx5_init_once(dev); 1408 if (err) { 1409 mlx5_core_err(dev, "sw objs init failed\n"); 1410 goto function_teardown; 1411 } 1412 1413 /* In case of light_probe, mlx5_devlink is already registered. 1414 * Hence, don't register devlink again. 1415 */ 1416 if (!light_probe) { 1417 err = mlx5_devlink_params_register(priv_to_devlink(dev)); 1418 if (err) 1419 goto err_devlink_params_reg; 1420 } 1421 1422 err = mlx5_load(dev); 1423 if (err) 1424 goto err_load; 1425 1426 set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); 1427 1428 err = mlx5_register_device(dev); 1429 if (err) 1430 goto err_register; 1431 1432 err = mlx5_crdump_enable(dev); 1433 if (err) 1434 mlx5_core_err(dev, "mlx5_crdump_enable failed with error code %d\n", err); 1435 1436 err = mlx5_hwmon_dev_register(dev); 1437 if (err) 1438 mlx5_core_err(dev, "mlx5_hwmon_dev_register failed with error code %d\n", err); 1439 1440 mutex_unlock(&dev->intf_state_mutex); 1441 return 0; 1442 1443 err_register: 1444 clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); 1445 mlx5_unload(dev); 1446 err_load: 1447 if (!light_probe) 1448 mlx5_devlink_params_unregister(priv_to_devlink(dev)); 1449 err_devlink_params_reg: 1450 mlx5_cleanup_once(dev); 1451 function_teardown: 1452 mlx5_function_teardown(dev, true); 1453 err_function: 1454 dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; 1455 mutex_unlock(&dev->intf_state_mutex); 1456 return err; 1457 } 1458 1459 int mlx5_init_one(struct mlx5_core_dev *dev) 1460 { 1461 struct devlink *devlink = priv_to_devlink(dev); 1462 int err; 1463 1464 devl_lock(devlink); 1465 if (dev->shd) { 1466 err = devl_nested_devlink_set(dev->shd, devlink); 1467 if (err) 1468 goto unlock; 1469 } 1470 devl_register(devlink); 1471 err = mlx5_init_one_devl_locked(dev); 1472 if (err) 1473 devl_unregister(devlink); 1474 unlock: 1475 devl_unlock(devlink); 1476 return err; 1477 } 1478 1479 void mlx5_uninit_one(struct mlx5_core_dev *dev) 1480 { 1481 struct devlink *devlink = priv_to_devlink(dev); 1482 1483 devl_lock(devlink); 1484 mutex_lock(&dev->intf_state_mutex); 1485 1486 mlx5_hwmon_dev_unregister(dev); 1487 mlx5_crdump_disable(dev); 1488 mlx5_eswitch_disable(dev->priv.eswitch); 1489 mlx5_unregister_device(dev); 1490 1491 if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { 1492 mlx5_core_warn(dev, "%s: interface is down, NOP\n", 1493 __func__); 1494 mlx5_devlink_params_unregister(priv_to_devlink(dev)); 1495 mlx5_cleanup_once(dev); 1496 goto out; 1497 } 1498 1499 clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); 1500 mlx5_unload(dev); 1501 mlx5_devlink_params_unregister(priv_to_devlink(dev)); 1502 mlx5_cleanup_once(dev); 1503 mlx5_function_teardown(dev, true); 1504 out: 1505 mutex_unlock(&dev->intf_state_mutex); 1506 devl_unregister(devlink); 1507 devl_unlock(devlink); 1508 } 1509 1510 int mlx5_load_one_devl_locked(struct mlx5_core_dev *dev, bool recovery) 1511 { 1512 int err = 0; 1513 u64 timeout; 1514 1515 devl_assert_locked(priv_to_devlink(dev)); 1516 mutex_lock(&dev->intf_state_mutex); 1517 if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { 1518 mlx5_core_warn(dev, "interface is up, NOP\n"); 1519 goto out; 1520 } 1521 /* remove any previous indication of internal error */ 1522 dev->state = MLX5_DEVICE_STATE_UP; 1523 1524 if (recovery) 1525 timeout = mlx5_tout_ms(dev, FW_PRE_INIT_ON_RECOVERY_TIMEOUT); 1526 else 1527 timeout = mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT); 1528 err = mlx5_function_setup(dev, false, timeout); 1529 if (err) 1530 goto err_function; 1531 1532 err = mlx5_load(dev); 1533 if (err) 1534 goto err_load; 1535 1536 set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); 1537 1538 err = mlx5_attach_device(dev); 1539 if (err) 1540 goto err_attach; 1541 1542 mutex_unlock(&dev->intf_state_mutex); 1543 return 0; 1544 1545 err_attach: 1546 clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); 1547 mlx5_unload(dev); 1548 err_load: 1549 mlx5_function_teardown(dev, false); 1550 err_function: 1551 dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; 1552 out: 1553 mutex_unlock(&dev->intf_state_mutex); 1554 return err; 1555 } 1556 1557 int mlx5_load_one(struct mlx5_core_dev *dev, bool recovery) 1558 { 1559 struct devlink *devlink = priv_to_devlink(dev); 1560 int ret; 1561 1562 devl_lock(devlink); 1563 ret = mlx5_load_one_devl_locked(dev, recovery); 1564 devl_unlock(devlink); 1565 return ret; 1566 } 1567 1568 void mlx5_unload_one_devl_locked(struct mlx5_core_dev *dev, bool suspend) 1569 { 1570 devl_assert_locked(priv_to_devlink(dev)); 1571 mutex_lock(&dev->intf_state_mutex); 1572 1573 mlx5_eswitch_disable(dev->priv.eswitch); 1574 mlx5_detach_device(dev, suspend); 1575 1576 if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { 1577 mlx5_core_warn(dev, "%s: interface is down, NOP\n", 1578 __func__); 1579 goto out; 1580 } 1581 1582 clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); 1583 mlx5_unload(dev); 1584 mlx5_function_teardown(dev, false); 1585 out: 1586 mutex_unlock(&dev->intf_state_mutex); 1587 } 1588 1589 void mlx5_unload_one(struct mlx5_core_dev *dev, bool suspend) 1590 { 1591 struct devlink *devlink = priv_to_devlink(dev); 1592 1593 devl_lock(devlink); 1594 mlx5_unload_one_devl_locked(dev, suspend); 1595 devl_unlock(devlink); 1596 } 1597 1598 /* In case of light probe, we don't need a full query of hca_caps, but only the bellow caps. 1599 * A full query of hca_caps will be done when the device will reload. 1600 */ 1601 static int mlx5_query_hca_caps_light(struct mlx5_core_dev *dev) 1602 { 1603 int err; 1604 1605 err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL); 1606 if (err) 1607 return err; 1608 1609 if (MLX5_CAP_GEN(dev, eth_net_offloads)) { 1610 err = mlx5_core_get_caps_mode(dev, MLX5_CAP_ETHERNET_OFFLOADS, 1611 HCA_CAP_OPMOD_GET_CUR); 1612 if (err) 1613 return err; 1614 } 1615 1616 if (MLX5_CAP_GEN(dev, nic_flow_table) || 1617 MLX5_CAP_GEN(dev, ipoib_enhanced_offloads)) { 1618 err = mlx5_core_get_caps_mode(dev, MLX5_CAP_FLOW_TABLE, 1619 HCA_CAP_OPMOD_GET_CUR); 1620 if (err) 1621 return err; 1622 } 1623 1624 if (MLX5_CAP_GEN_64(dev, general_obj_types) & 1625 MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q) { 1626 err = mlx5_core_get_caps_mode(dev, MLX5_CAP_VDPA_EMULATION, 1627 HCA_CAP_OPMOD_GET_CUR); 1628 if (err) 1629 return err; 1630 } 1631 1632 return 0; 1633 } 1634 1635 int mlx5_init_one_light(struct mlx5_core_dev *dev) 1636 { 1637 struct devlink *devlink = priv_to_devlink(dev); 1638 int err; 1639 1640 devl_lock(devlink); 1641 devl_register(devlink); 1642 dev->state = MLX5_DEVICE_STATE_UP; 1643 err = mlx5_function_enable(dev, true, mlx5_tout_ms(dev, FW_PRE_INIT_TIMEOUT)); 1644 if (err) { 1645 mlx5_core_warn(dev, "mlx5_function_enable err=%d\n", err); 1646 goto out; 1647 } 1648 1649 err = mlx5_query_hca_caps_light(dev); 1650 if (err) { 1651 mlx5_core_warn(dev, "mlx5_query_hca_caps_light err=%d\n", err); 1652 goto query_hca_caps_err; 1653 } 1654 1655 err = mlx5_devlink_params_register(priv_to_devlink(dev)); 1656 if (err) { 1657 mlx5_core_warn(dev, "mlx5_devlink_param_reg err = %d\n", err); 1658 goto query_hca_caps_err; 1659 } 1660 1661 devl_unlock(devlink); 1662 return 0; 1663 1664 query_hca_caps_err: 1665 mlx5_function_disable(dev, true); 1666 out: 1667 dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; 1668 devl_unregister(devlink); 1669 devl_unlock(devlink); 1670 return err; 1671 } 1672 1673 void mlx5_uninit_one_light(struct mlx5_core_dev *dev) 1674 { 1675 struct devlink *devlink = priv_to_devlink(dev); 1676 1677 devl_lock(devlink); 1678 mlx5_devlink_params_unregister(priv_to_devlink(dev)); 1679 devl_unregister(devlink); 1680 devl_unlock(devlink); 1681 if (dev->state != MLX5_DEVICE_STATE_UP) 1682 return; 1683 mlx5_function_disable(dev, true); 1684 } 1685 1686 /* xxx_light() function are used in order to configure the device without full 1687 * init (light init). e.g.: There isn't a point in reload a device to light state. 1688 * Hence, mlx5_load_one_light() isn't needed. 1689 */ 1690 1691 void mlx5_unload_one_light(struct mlx5_core_dev *dev) 1692 { 1693 if (dev->state != MLX5_DEVICE_STATE_UP) 1694 return; 1695 mlx5_function_disable(dev, false); 1696 dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; 1697 } 1698 1699 static const int types[] = { 1700 MLX5_CAP_GENERAL, 1701 MLX5_CAP_GENERAL_2, 1702 MLX5_CAP_ETHERNET_OFFLOADS, 1703 MLX5_CAP_IPOIB_ENHANCED_OFFLOADS, 1704 MLX5_CAP_ODP, 1705 MLX5_CAP_ATOMIC, 1706 MLX5_CAP_ROCE, 1707 MLX5_CAP_IPOIB_OFFLOADS, 1708 MLX5_CAP_FLOW_TABLE, 1709 MLX5_CAP_ESWITCH_FLOW_TABLE, 1710 MLX5_CAP_ESWITCH, 1711 MLX5_CAP_QOS, 1712 MLX5_CAP_DEBUG, 1713 MLX5_CAP_DEV_MEM, 1714 MLX5_CAP_DEV_EVENT, 1715 MLX5_CAP_TLS, 1716 MLX5_CAP_VDPA_EMULATION, 1717 MLX5_CAP_IPSEC, 1718 MLX5_CAP_PORT_SELECTION, 1719 MLX5_CAP_PSP, 1720 MLX5_CAP_MACSEC, 1721 MLX5_CAP_ADV_VIRTUALIZATION, 1722 MLX5_CAP_CRYPTO, 1723 MLX5_CAP_SHAMPO, 1724 MLX5_CAP_ADV_RDMA, 1725 MLX5_CAP_TLP_EMULATION, 1726 }; 1727 1728 static void mlx5_hca_caps_free(struct mlx5_core_dev *dev) 1729 { 1730 int type; 1731 int i; 1732 1733 for (i = 0; i < ARRAY_SIZE(types); i++) { 1734 type = types[i]; 1735 kfree(dev->caps.hca[type]); 1736 } 1737 } 1738 1739 static int mlx5_hca_caps_alloc(struct mlx5_core_dev *dev) 1740 { 1741 struct mlx5_hca_cap *cap; 1742 int type; 1743 int i; 1744 1745 for (i = 0; i < ARRAY_SIZE(types); i++) { 1746 cap = kzalloc_obj(*cap); 1747 if (!cap) 1748 goto err; 1749 type = types[i]; 1750 dev->caps.hca[type] = cap; 1751 } 1752 1753 return 0; 1754 1755 err: 1756 mlx5_hca_caps_free(dev); 1757 return -ENOMEM; 1758 } 1759 1760 static int mlx5_notifiers_init(struct mlx5_core_dev *dev) 1761 { 1762 int err; 1763 1764 err = mlx5_events_init(dev); 1765 if (err) { 1766 mlx5_core_err(dev, "failed to initialize events\n"); 1767 return err; 1768 } 1769 1770 BLOCKING_INIT_NOTIFIER_HEAD(&dev->priv.esw_n_head); 1771 mlx5_vhca_state_notifier_init(dev); 1772 1773 err = mlx5_sf_hw_notifier_init(dev); 1774 if (err) 1775 goto err_sf_hw_notifier; 1776 1777 err = mlx5_sf_notifiers_init(dev); 1778 if (err) 1779 goto err_sf_notifiers; 1780 1781 err = mlx5_sf_dev_notifier_init(dev); 1782 if (err) 1783 goto err_sf_dev_notifier; 1784 1785 return 0; 1786 1787 err_sf_dev_notifier: 1788 mlx5_sf_notifiers_cleanup(dev); 1789 err_sf_notifiers: 1790 mlx5_sf_hw_notifier_cleanup(dev); 1791 err_sf_hw_notifier: 1792 mlx5_events_cleanup(dev); 1793 return err; 1794 } 1795 1796 static void mlx5_notifiers_cleanup(struct mlx5_core_dev *dev) 1797 { 1798 mlx5_sf_dev_notifier_cleanup(dev); 1799 mlx5_sf_notifiers_cleanup(dev); 1800 mlx5_sf_hw_notifier_cleanup(dev); 1801 mlx5_events_cleanup(dev); 1802 } 1803 1804 int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) 1805 { 1806 struct mlx5_priv *priv = &dev->priv; 1807 int err; 1808 1809 memcpy(&dev->profile, &profile[profile_idx], sizeof(dev->profile)); 1810 lockdep_register_key(&dev->lock_key); 1811 mutex_init(&dev->intf_state_mutex); 1812 lockdep_set_class(&dev->intf_state_mutex, &dev->lock_key); 1813 mutex_init(&dev->mlx5e_res.uplink_netdev_lock); 1814 mutex_init(&dev->wc_state_lock); 1815 1816 mutex_init(&priv->bfregs.reg_head.lock); 1817 mutex_init(&priv->bfregs.wc_head.lock); 1818 INIT_LIST_HEAD(&priv->bfregs.reg_head.list); 1819 INIT_LIST_HEAD(&priv->bfregs.wc_head.list); 1820 1821 mutex_init(&priv->alloc_mutex); 1822 mutex_init(&priv->pgdir_mutex); 1823 INIT_LIST_HEAD(&priv->pgdir_list); 1824 1825 priv->numa_node = dev_to_node(mlx5_core_dma_dev(dev)); 1826 priv->dbg.dbg_root = debugfs_create_dir(dev_name(dev->device), 1827 mlx5_debugfs_root); 1828 1829 err = mlx5_frag_buf_pools_init(dev); 1830 if (err) 1831 goto err_frag_buf_pools_init; 1832 1833 INIT_LIST_HEAD(&priv->traps); 1834 1835 err = mlx5_cmd_init(dev); 1836 if (err) { 1837 mlx5_core_err(dev, "Failed initializing cmdif SW structs, aborting\n"); 1838 goto err_cmd_init; 1839 } 1840 1841 err = mlx5_tout_init(dev); 1842 if (err) { 1843 mlx5_core_err(dev, "Failed initializing timeouts, aborting\n"); 1844 goto err_timeout_init; 1845 } 1846 1847 err = mlx5_health_init(dev); 1848 if (err) 1849 goto err_health_init; 1850 1851 err = mlx5_pagealloc_init(dev); 1852 if (err) 1853 goto err_pagealloc_init; 1854 1855 err = mlx5_adev_init(dev); 1856 if (err) 1857 goto err_adev_init; 1858 1859 err = mlx5_hca_caps_alloc(dev); 1860 if (err) 1861 goto err_hca_caps; 1862 1863 err = mlx5_notifiers_init(dev); 1864 if (err) 1865 goto err_notifiers_init; 1866 1867 /* The conjunction of sw_vhca_id with sw_owner_id will be a global 1868 * unique id per function which uses mlx5_core. 1869 * Those values are supplied to FW as part of the init HCA command to 1870 * be used by both driver and FW when it's applicable. 1871 */ 1872 dev->priv.sw_vhca_id = ida_alloc_range(&sw_vhca_ida, 1, 1873 MAX_SW_VHCA_ID, 1874 GFP_KERNEL); 1875 if (dev->priv.sw_vhca_id < 0) 1876 mlx5_core_err(dev, "failed to allocate sw_vhca_id, err=%d\n", 1877 dev->priv.sw_vhca_id); 1878 1879 return 0; 1880 1881 err_notifiers_init: 1882 mlx5_hca_caps_free(dev); 1883 err_hca_caps: 1884 mlx5_adev_cleanup(dev); 1885 err_adev_init: 1886 mlx5_pagealloc_cleanup(dev); 1887 err_pagealloc_init: 1888 mlx5_health_cleanup(dev); 1889 err_health_init: 1890 mlx5_tout_cleanup(dev); 1891 err_timeout_init: 1892 mlx5_cmd_cleanup(dev); 1893 err_cmd_init: 1894 mlx5_frag_buf_pools_cleanup(dev); 1895 err_frag_buf_pools_init: 1896 debugfs_remove(dev->priv.dbg.dbg_root); 1897 mutex_destroy(&priv->pgdir_mutex); 1898 mutex_destroy(&priv->alloc_mutex); 1899 mutex_destroy(&priv->bfregs.wc_head.lock); 1900 mutex_destroy(&priv->bfregs.reg_head.lock); 1901 mutex_destroy(&dev->intf_state_mutex); 1902 lockdep_unregister_key(&dev->lock_key); 1903 return err; 1904 } 1905 1906 void mlx5_mdev_uninit(struct mlx5_core_dev *dev) 1907 { 1908 struct mlx5_priv *priv = &dev->priv; 1909 1910 if (priv->sw_vhca_id > 0) 1911 ida_free(&sw_vhca_ida, dev->priv.sw_vhca_id); 1912 1913 mlx5_notifiers_cleanup(dev); 1914 mlx5_hca_caps_free(dev); 1915 mlx5_adev_cleanup(dev); 1916 mlx5_pagealloc_cleanup(dev); 1917 mlx5_health_cleanup(dev); 1918 mlx5_tout_cleanup(dev); 1919 mlx5_cmd_cleanup(dev); 1920 mlx5_frag_buf_pools_cleanup(dev); 1921 debugfs_remove_recursive(dev->priv.dbg.dbg_root); 1922 mutex_destroy(&priv->pgdir_mutex); 1923 mutex_destroy(&priv->alloc_mutex); 1924 mutex_destroy(&priv->bfregs.wc_head.lock); 1925 mutex_destroy(&priv->bfregs.reg_head.lock); 1926 mutex_destroy(&dev->wc_state_lock); 1927 mutex_destroy(&dev->mlx5e_res.uplink_netdev_lock); 1928 mutex_destroy(&dev->intf_state_mutex); 1929 lockdep_unregister_key(&dev->lock_key); 1930 } 1931 1932 static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id) 1933 { 1934 struct mlx5_core_dev *dev; 1935 struct devlink *devlink; 1936 int err; 1937 1938 devlink = mlx5_devlink_alloc(&pdev->dev); 1939 if (!devlink) { 1940 dev_err(&pdev->dev, "devlink alloc failed\n"); 1941 return -ENOMEM; 1942 } 1943 1944 dev = devlink_priv(devlink); 1945 dev->device = &pdev->dev; 1946 dev->pdev = pdev; 1947 1948 dev->coredev_type = id->driver_data & MLX5_PCI_DEV_IS_VF ? 1949 MLX5_COREDEV_VF : MLX5_COREDEV_PF; 1950 1951 dev->priv.adev_idx = mlx5_adev_idx_alloc(); 1952 if (dev->priv.adev_idx < 0) { 1953 err = dev->priv.adev_idx; 1954 goto adev_init_err; 1955 } 1956 1957 err = mlx5_mdev_init(dev, prof_sel); 1958 if (err) 1959 goto mdev_init_err; 1960 1961 err = mlx5_pci_init(dev, pdev, id); 1962 if (err) { 1963 mlx5_core_err(dev, "mlx5_pci_init failed with error code %d\n", 1964 err); 1965 goto pci_init_err; 1966 } 1967 1968 err = mlx5_shd_init(dev); 1969 if (err) { 1970 mlx5_core_err(dev, "mlx5_shd_init failed with error code %d\n", 1971 err); 1972 goto shd_init_err; 1973 } 1974 1975 err = mlx5_init_one(dev); 1976 if (err) { 1977 mlx5_core_err(dev, "mlx5_init_one failed with error code %d\n", 1978 err); 1979 goto err_init_one; 1980 } 1981 1982 mlx5_vhca_debugfs_init(dev); 1983 1984 pci_save_state(pdev); 1985 return 0; 1986 1987 err_init_one: 1988 mlx5_shd_uninit(dev); 1989 shd_init_err: 1990 mlx5_pci_close(dev); 1991 pci_init_err: 1992 mlx5_mdev_uninit(dev); 1993 mdev_init_err: 1994 mlx5_adev_idx_free(dev->priv.adev_idx); 1995 adev_init_err: 1996 mlx5_devlink_free(devlink); 1997 1998 return err; 1999 } 2000 2001 static void remove_one(struct pci_dev *pdev) 2002 { 2003 struct mlx5_core_dev *dev = pci_get_drvdata(pdev); 2004 struct devlink *devlink = priv_to_devlink(dev); 2005 2006 set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state); 2007 mlx5_drain_fw_reset(dev); 2008 mlx5_drain_health_wq(dev); 2009 mlx5_sriov_disable(pdev, false); 2010 mlx5_uninit_one(dev); 2011 mlx5_shd_uninit(dev); 2012 mlx5_pci_close(dev); 2013 mlx5_mdev_uninit(dev); 2014 mlx5_adev_idx_free(dev->priv.adev_idx); 2015 mlx5_devlink_free(devlink); 2016 } 2017 2018 #define mlx5_pci_trace(dev, fmt, ...) ({ \ 2019 struct mlx5_core_dev *__dev = (dev); \ 2020 mlx5_core_info(__dev, "%s Device state = %d health sensors: %d pci_status: %d. " fmt, \ 2021 __func__, __dev->state, mlx5_health_check_fatal_sensors(__dev), \ 2022 __dev->pci_status, ##__VA_ARGS__); \ 2023 }) 2024 2025 static const char *result2str(enum pci_ers_result result) 2026 { 2027 return result == PCI_ERS_RESULT_NEED_RESET ? "need reset" : 2028 result == PCI_ERS_RESULT_DISCONNECT ? "disconnect" : 2029 result == PCI_ERS_RESULT_RECOVERED ? "recovered" : 2030 "unknown"; 2031 } 2032 2033 static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, 2034 pci_channel_state_t state) 2035 { 2036 struct mlx5_core_dev *dev = pci_get_drvdata(pdev); 2037 enum pci_ers_result res; 2038 2039 mlx5_pci_trace(dev, "Enter, pci channel state = %d\n", state); 2040 2041 mlx5_enter_error_state(dev, false); 2042 mlx5_error_sw_reset(dev); 2043 mlx5_unload_one(dev, false); 2044 mlx5_drain_health_wq(dev); 2045 mlx5_pci_disable_device(dev); 2046 2047 res = state == pci_channel_io_perm_failure ? 2048 PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET; 2049 2050 mlx5_core_info(dev, "%s Device state = %d pci_status: %d. Exit, result = %d, %s\n", 2051 __func__, dev->state, dev->pci_status, res, result2str(res)); 2052 return res; 2053 } 2054 2055 /* wait for the device to show vital signs by waiting 2056 * for the health counter to start counting. 2057 */ 2058 static int wait_vital(struct pci_dev *pdev) 2059 { 2060 struct mlx5_core_dev *dev = pci_get_drvdata(pdev); 2061 struct mlx5_core_health *health = &dev->priv.health; 2062 const int niter = 100; 2063 u32 last_count = 0; 2064 u32 count; 2065 int i; 2066 2067 for (i = 0; i < niter; i++) { 2068 count = ioread32be(health->health_counter); 2069 if (count && count != 0xffffffff) { 2070 if (last_count && last_count != count) { 2071 mlx5_core_info(dev, 2072 "wait vital counter value 0x%x after %d iterations\n", 2073 count, i); 2074 return 0; 2075 } 2076 last_count = count; 2077 } 2078 msleep(50); 2079 } 2080 2081 return -ETIMEDOUT; 2082 } 2083 2084 static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev) 2085 { 2086 enum pci_ers_result res = PCI_ERS_RESULT_DISCONNECT; 2087 struct mlx5_core_dev *dev = pci_get_drvdata(pdev); 2088 int err; 2089 2090 mlx5_core_info(dev, "%s Device state = %d pci_status: %d. Enter\n", 2091 __func__, dev->state, dev->pci_status); 2092 2093 err = mlx5_pci_enable_device(dev); 2094 if (err) { 2095 mlx5_core_err(dev, "%s: mlx5_pci_enable_device failed with error code: %d\n", 2096 __func__, err); 2097 goto out; 2098 } 2099 2100 pci_set_master(pdev); 2101 pci_restore_state(pdev); 2102 2103 err = wait_vital(pdev); 2104 if (err) { 2105 mlx5_core_err(dev, "%s: wait vital failed with error code: %d\n", 2106 __func__, err); 2107 goto out; 2108 } 2109 2110 res = PCI_ERS_RESULT_RECOVERED; 2111 out: 2112 mlx5_core_info(dev, "%s Device state = %d pci_status: %d. Exit, err = %d, result = %d, %s\n", 2113 __func__, dev->state, dev->pci_status, err, res, result2str(res)); 2114 return res; 2115 } 2116 2117 static void mlx5_pci_resume(struct pci_dev *pdev) 2118 { 2119 struct mlx5_core_dev *dev = pci_get_drvdata(pdev); 2120 int err; 2121 2122 mlx5_pci_trace(dev, "Enter, loading driver..\n"); 2123 2124 err = mlx5_load_one(dev, false); 2125 2126 if (!err) 2127 devlink_health_reporter_state_update(dev->priv.health.fw_fatal_reporter, 2128 DEVLINK_HEALTH_REPORTER_STATE_HEALTHY); 2129 2130 mlx5_pci_trace(dev, "Done, err = %d, device %s\n", err, 2131 !err ? "recovered" : "Failed"); 2132 } 2133 2134 static const struct pci_error_handlers mlx5_err_handler = { 2135 .error_detected = mlx5_pci_err_detected, 2136 .slot_reset = mlx5_pci_slot_reset, 2137 .resume = mlx5_pci_resume 2138 }; 2139 2140 static int mlx5_try_fast_unload(struct mlx5_core_dev *dev) 2141 { 2142 bool fast_teardown = false, force_teardown = false; 2143 int ret = 1; 2144 2145 fast_teardown = MLX5_CAP_GEN(dev, fast_teardown); 2146 force_teardown = MLX5_CAP_GEN(dev, force_teardown); 2147 2148 mlx5_core_dbg(dev, "force teardown firmware support=%d\n", force_teardown); 2149 mlx5_core_dbg(dev, "fast teardown firmware support=%d\n", fast_teardown); 2150 2151 if (!fast_teardown && !force_teardown) 2152 return -EOPNOTSUPP; 2153 2154 if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { 2155 mlx5_core_dbg(dev, "Device in internal error state, giving up\n"); 2156 return -EAGAIN; 2157 } 2158 2159 /* Panic tear down fw command will stop the PCI bus communication 2160 * with the HCA, so the health poll is no longer needed. 2161 */ 2162 mlx5_stop_health_poll(dev, false); 2163 2164 ret = mlx5_cmd_fast_teardown_hca(dev); 2165 if (!ret) 2166 goto succeed; 2167 2168 ret = mlx5_cmd_force_teardown_hca(dev); 2169 if (!ret) 2170 goto succeed; 2171 2172 mlx5_core_dbg(dev, "Firmware couldn't do fast unload error: %d\n", ret); 2173 mlx5_start_health_poll(dev); 2174 return ret; 2175 2176 succeed: 2177 mlx5_enter_error_state(dev, true); 2178 2179 /* Some platforms requiring freeing the IRQ's in the shutdown 2180 * flow. If they aren't freed they can't be allocated after 2181 * kexec. There is no need to cleanup the mlx5_core software 2182 * contexts. 2183 */ 2184 mlx5_core_eq_free_irqs(dev); 2185 2186 return 0; 2187 } 2188 2189 static void shutdown(struct pci_dev *pdev) 2190 { 2191 struct mlx5_core_dev *dev = pci_get_drvdata(pdev); 2192 int err; 2193 2194 mlx5_core_info(dev, "Shutdown was called\n"); 2195 set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state); 2196 mlx5_drain_fw_reset(dev); 2197 mlx5_drain_health_wq(dev); 2198 err = mlx5_try_fast_unload(dev); 2199 if (err) 2200 mlx5_unload_one(dev, false); 2201 mlx5_pci_disable_device(dev); 2202 } 2203 2204 static int mlx5_suspend(struct pci_dev *pdev, pm_message_t state) 2205 { 2206 struct mlx5_core_dev *dev = pci_get_drvdata(pdev); 2207 2208 mlx5_unload_one(dev, true); 2209 2210 return 0; 2211 } 2212 2213 static int mlx5_resume(struct pci_dev *pdev) 2214 { 2215 struct mlx5_core_dev *dev = pci_get_drvdata(pdev); 2216 2217 return mlx5_load_one(dev, false); 2218 } 2219 2220 static const struct pci_device_id mlx5_core_pci_table[] = { 2221 { PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_CONNECTIB) }, 2222 { PCI_VDEVICE(MELLANOX, 0x1012), 2223 .driver_data = MLX5_PCI_DEV_IS_VF }, /* Connect-IB VF */ 2224 { PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_CONNECTX4) }, 2225 { PCI_VDEVICE(MELLANOX, 0x1014), 2226 .driver_data = MLX5_PCI_DEV_IS_VF }, /* ConnectX-4 VF */ 2227 { PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_CONNECTX4_LX) }, 2228 { PCI_VDEVICE(MELLANOX, 0x1016), 2229 .driver_data = MLX5_PCI_DEV_IS_VF }, /* ConnectX-4LX VF */ 2230 { PCI_VDEVICE(MELLANOX, 0x1017) }, /* ConnectX-5, PCIe 3.0 */ 2231 { PCI_VDEVICE(MELLANOX, 0x1018), 2232 .driver_data = MLX5_PCI_DEV_IS_VF }, /* ConnectX-5 VF */ 2233 { PCI_VDEVICE(MELLANOX, 0x1019) }, /* ConnectX-5 Ex */ 2234 { PCI_VDEVICE(MELLANOX, 0x101a), 2235 .driver_data = MLX5_PCI_DEV_IS_VF }, /* ConnectX-5 Ex VF */ 2236 { PCI_VDEVICE(MELLANOX, 0x101b) }, /* ConnectX-6 */ 2237 { PCI_VDEVICE(MELLANOX, 0x101c), 2238 .driver_data = MLX5_PCI_DEV_IS_VF }, /* ConnectX-6 VF */ 2239 { PCI_VDEVICE(MELLANOX, 0x101d) }, /* ConnectX-6 Dx */ 2240 { PCI_VDEVICE(MELLANOX, 0x101e), 2241 .driver_data = MLX5_PCI_DEV_IS_VF }, /* ConnectX Family mlx5Gen Virtual Function */ 2242 { PCI_VDEVICE(MELLANOX, 0x101f) }, /* ConnectX-6 LX */ 2243 { PCI_VDEVICE(MELLANOX, 0x1021) }, /* ConnectX-7 */ 2244 { PCI_VDEVICE(MELLANOX, 0x1023) }, /* ConnectX-8 */ 2245 { PCI_VDEVICE(MELLANOX, 0x1025) }, /* ConnectX-9 */ 2246 { PCI_VDEVICE(MELLANOX, 0x1027) }, /* ConnectX-10 */ 2247 { PCI_VDEVICE(MELLANOX, 0x2101) }, /* ConnectX-10 NVLink-C2C */ 2248 { PCI_VDEVICE(MELLANOX, 0xa2d2) }, /* BlueField integrated ConnectX-5 network controller */ 2249 { PCI_VDEVICE(MELLANOX, 0xa2d3), 2250 .driver_data = MLX5_PCI_DEV_IS_VF }, /* BlueField integrated ConnectX-5 network controller VF */ 2251 { PCI_VDEVICE(MELLANOX, 0xa2d6) }, /* BlueField-2 integrated ConnectX-6 Dx network controller */ 2252 { PCI_VDEVICE(MELLANOX, 0xa2dc) }, /* BlueField-3 integrated ConnectX-7 network controller */ 2253 { PCI_VDEVICE(MELLANOX, 0xa2df) }, /* BlueField-4 integrated ConnectX-8 network controller */ 2254 { } 2255 }; 2256 2257 MODULE_DEVICE_TABLE(pci, mlx5_core_pci_table); 2258 2259 void mlx5_disable_device(struct mlx5_core_dev *dev) 2260 { 2261 mlx5_error_sw_reset(dev); 2262 mlx5_unload_one_devl_locked(dev, false); 2263 } 2264 2265 int mlx5_recover_device(struct mlx5_core_dev *dev) 2266 { 2267 if (!mlx5_core_is_sf(dev)) { 2268 mlx5_pci_disable_device(dev); 2269 if (mlx5_pci_slot_reset(dev->pdev) != PCI_ERS_RESULT_RECOVERED) 2270 return -EIO; 2271 } 2272 2273 return mlx5_load_one_devl_locked(dev, true); 2274 } 2275 2276 static struct pci_driver mlx5_core_driver = { 2277 .name = KBUILD_MODNAME, 2278 .id_table = mlx5_core_pci_table, 2279 .probe = probe_one, 2280 .remove = remove_one, 2281 .suspend = mlx5_suspend, 2282 .resume = mlx5_resume, 2283 .shutdown = shutdown, 2284 .err_handler = &mlx5_err_handler, 2285 .sriov_configure = mlx5_core_sriov_configure, 2286 .sriov_get_vf_total_msix = mlx5_sriov_get_vf_total_msix, 2287 .sriov_set_msix_vec_count = mlx5_core_sriov_set_msix_vec_count, 2288 }; 2289 2290 /** 2291 * mlx5_vf_get_core_dev - Get the mlx5 core device from a given VF PCI device if 2292 * mlx5_core is its driver. 2293 * @pdev: The associated PCI device. 2294 * 2295 * Upon return the interface state lock stay held to let caller uses it safely. 2296 * Caller must ensure to use the returned mlx5 device for a narrow window 2297 * and put it back with mlx5_vf_put_core_dev() immediately once usage was over. 2298 * 2299 * Return: Pointer to the associated mlx5_core_dev or NULL. 2300 */ 2301 struct mlx5_core_dev *mlx5_vf_get_core_dev(struct pci_dev *pdev) 2302 { 2303 struct mlx5_core_dev *mdev; 2304 2305 mdev = pci_iov_get_pf_drvdata(pdev, &mlx5_core_driver); 2306 if (IS_ERR(mdev)) 2307 return NULL; 2308 2309 mutex_lock(&mdev->intf_state_mutex); 2310 if (!test_bit(MLX5_INTERFACE_STATE_UP, &mdev->intf_state)) { 2311 mutex_unlock(&mdev->intf_state_mutex); 2312 return NULL; 2313 } 2314 2315 return mdev; 2316 } 2317 EXPORT_SYMBOL(mlx5_vf_get_core_dev); 2318 2319 /** 2320 * mlx5_vf_put_core_dev - Put the mlx5 core device back. 2321 * @mdev: The mlx5 core device. 2322 * 2323 * Upon return the interface state lock is unlocked and caller should not 2324 * access the mdev any more. 2325 */ 2326 void mlx5_vf_put_core_dev(struct mlx5_core_dev *mdev) 2327 { 2328 mutex_unlock(&mdev->intf_state_mutex); 2329 } 2330 EXPORT_SYMBOL(mlx5_vf_put_core_dev); 2331 2332 static void mlx5_core_verify_params(void) 2333 { 2334 if (prof_sel >= ARRAY_SIZE(profile)) { 2335 pr_warn("mlx5_core: WARNING: Invalid module parameter prof_sel %d, valid range 0-%zu, changing back to default(%d)\n", 2336 prof_sel, 2337 ARRAY_SIZE(profile) - 1, 2338 MLX5_DEFAULT_PROF); 2339 prof_sel = MLX5_DEFAULT_PROF; 2340 } 2341 } 2342 2343 static int __init mlx5_init(void) 2344 { 2345 int err; 2346 2347 WARN_ONCE(strcmp(MLX5_ADEV_NAME, KBUILD_MODNAME), 2348 "mlx5_core name not in sync with kernel module name"); 2349 2350 get_random_bytes(&sw_owner_id, sizeof(sw_owner_id)); 2351 2352 mlx5_core_verify_params(); 2353 mlx5_register_debugfs(); 2354 2355 err = mlx5e_init(); 2356 if (err) 2357 goto err_debug; 2358 2359 err = mlx5_sf_driver_register(); 2360 if (err) 2361 goto err_sf; 2362 2363 err = pci_register_driver(&mlx5_core_driver); 2364 if (err) 2365 goto err_pci; 2366 2367 return 0; 2368 2369 err_pci: 2370 mlx5_sf_driver_unregister(); 2371 err_sf: 2372 mlx5e_cleanup(); 2373 err_debug: 2374 mlx5_unregister_debugfs(); 2375 return err; 2376 } 2377 2378 static void __exit mlx5_cleanup(void) 2379 { 2380 pci_unregister_driver(&mlx5_core_driver); 2381 mlx5_sf_driver_unregister(); 2382 mlx5e_cleanup(); 2383 mlx5_unregister_debugfs(); 2384 } 2385 2386 module_init(mlx5_init); 2387 module_exit(mlx5_cleanup); 2388