1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* 3 * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved. 4 * Copyright (c) 2020, Intel Corporation. All rights reserved. 5 */ 6 7 #include <linux/debugfs.h> 8 #include <linux/highmem.h> 9 #include <linux/module.h> 10 #include <linux/init.h> 11 #include <linux/errno.h> 12 #include <linux/pci.h> 13 #include <linux/dma-mapping.h> 14 #include <linux/slab.h> 15 #include <linux/bitmap.h> 16 #include <linux/sched.h> 17 #include <linux/sched/mm.h> 18 #include <linux/sched/task.h> 19 #include <linux/delay.h> 20 #include <rdma/ib_user_verbs.h> 21 #include <rdma/ib_addr.h> 22 #include <rdma/ib_cache.h> 23 #include <linux/mlx5/port.h> 24 #include <linux/mlx5/vport.h> 25 #include <linux/mlx5/fs.h> 26 #include <linux/mlx5/eswitch.h> 27 #include <linux/mlx5/driver.h> 28 #include <linux/list.h> 29 #include <rdma/ib_smi.h> 30 #include <rdma/ib_umem_odp.h> 31 #include <rdma/lag.h> 32 #include <linux/in.h> 33 #include <linux/etherdevice.h> 34 #include "mlx5_ib.h" 35 #include "ib_rep.h" 36 #include "cmd.h" 37 #include "devx.h" 38 #include "dm.h" 39 #include "fs.h" 40 #include "srq.h" 41 #include "qp.h" 42 #include "wr.h" 43 #include "restrack.h" 44 #include "counters.h" 45 #include "umr.h" 46 #include <rdma/uverbs_std_types.h> 47 #include <rdma/uverbs_ioctl.h> 48 #include <rdma/mlx5_user_ioctl_verbs.h> 49 #include <rdma/mlx5_user_ioctl_cmds.h> 50 #include "macsec.h" 51 #include "data_direct.h" 52 53 #define UVERBS_MODULE_NAME mlx5_ib 54 #include <rdma/uverbs_named_ioctl.h> 55 56 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>"); 57 MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) IB driver"); 58 MODULE_LICENSE("Dual BSD/GPL"); 59 60 struct mlx5_ib_event_work { 61 struct work_struct work; 62 union { 63 struct mlx5_ib_dev *dev; 64 struct mlx5_ib_multiport_info *mpi; 65 }; 66 bool is_slave; 67 unsigned int event; 68 void *param; 69 }; 70 71 enum { 72 MLX5_ATOMIC_SIZE_QP_8BYTES = 1 << 3, 73 }; 74 75 static struct workqueue_struct *mlx5_ib_event_wq; 76 static LIST_HEAD(mlx5_ib_unaffiliated_port_list); 77 static LIST_HEAD(mlx5_ib_dev_list); 78 /* 79 * This mutex should be held when accessing either of the above lists 80 */ 81 static DEFINE_MUTEX(mlx5_ib_multiport_mutex); 82 83 struct mlx5_ib_dev *mlx5_ib_get_ibdev_from_mpi(struct mlx5_ib_multiport_info *mpi) 84 { 85 struct mlx5_ib_dev *dev; 86 87 mutex_lock(&mlx5_ib_multiport_mutex); 88 dev = mpi->ibdev; 89 mutex_unlock(&mlx5_ib_multiport_mutex); 90 return dev; 91 } 92 93 static enum rdma_link_layer 94 mlx5_port_type_cap_to_rdma_ll(int port_type_cap) 95 { 96 switch (port_type_cap) { 97 case MLX5_CAP_PORT_TYPE_IB: 98 return IB_LINK_LAYER_INFINIBAND; 99 case MLX5_CAP_PORT_TYPE_ETH: 100 return IB_LINK_LAYER_ETHERNET; 101 default: 102 return IB_LINK_LAYER_UNSPECIFIED; 103 } 104 } 105 106 static enum rdma_link_layer 107 mlx5_ib_port_link_layer(struct ib_device *device, u32 port_num) 108 { 109 struct mlx5_ib_dev *dev = to_mdev(device); 110 int port_type_cap = MLX5_CAP_GEN(dev->mdev, port_type); 111 112 return mlx5_port_type_cap_to_rdma_ll(port_type_cap); 113 } 114 115 static int get_port_state(struct ib_device *ibdev, 116 u32 port_num, 117 enum ib_port_state *state) 118 { 119 struct ib_port_attr attr; 120 int ret; 121 122 memset(&attr, 0, sizeof(attr)); 123 ret = ibdev->ops.query_port(ibdev, port_num, &attr); 124 if (!ret) 125 *state = attr.state; 126 return ret; 127 } 128 129 static struct mlx5_roce *mlx5_get_rep_roce(struct mlx5_ib_dev *dev, 130 struct net_device *ndev, 131 struct net_device *upper, 132 u32 *port_num) 133 { 134 struct net_device *rep_ndev; 135 struct mlx5_ib_port *port; 136 int i; 137 138 for (i = 0; i < dev->num_ports; i++) { 139 port = &dev->port[i]; 140 if (!port->rep) 141 continue; 142 143 if (upper == ndev && port->rep->vport == MLX5_VPORT_UPLINK) { 144 *port_num = i + 1; 145 return &port->roce; 146 } 147 148 if (upper && port->rep->vport == MLX5_VPORT_UPLINK) 149 continue; 150 rep_ndev = ib_device_get_netdev(&dev->ib_dev, i + 1); 151 if (rep_ndev && rep_ndev == ndev) { 152 dev_put(rep_ndev); 153 *port_num = i + 1; 154 return &port->roce; 155 } 156 157 dev_put(rep_ndev); 158 } 159 160 return NULL; 161 } 162 163 static bool mlx5_netdev_send_event(struct mlx5_ib_dev *dev, 164 struct net_device *ndev, 165 struct net_device *upper, 166 struct net_device *ib_ndev) 167 { 168 if (!dev->ib_active) 169 return false; 170 171 /* Event is about our upper device */ 172 if (upper == ndev) 173 return true; 174 175 /* RDMA device is not in lag and not in switchdev */ 176 if (!dev->is_rep && !upper && ndev == ib_ndev) 177 return true; 178 179 /* RDMA devie is in switchdev */ 180 if (dev->is_rep && ndev == ib_ndev) 181 return true; 182 183 return false; 184 } 185 186 static struct net_device *mlx5_ib_get_rep_uplink_netdev(struct mlx5_ib_dev *ibdev) 187 { 188 struct mlx5_ib_port *port; 189 int i; 190 191 for (i = 0; i < ibdev->num_ports; i++) { 192 port = &ibdev->port[i]; 193 if (port->rep && port->rep->vport == MLX5_VPORT_UPLINK) { 194 return ib_device_get_netdev(&ibdev->ib_dev, i + 1); 195 } 196 } 197 198 return NULL; 199 } 200 201 static int mlx5_netdev_event(struct notifier_block *this, 202 unsigned long event, void *ptr) 203 { 204 struct mlx5_roce *roce = container_of(this, struct mlx5_roce, nb); 205 struct net_device *ndev = netdev_notifier_info_to_dev(ptr); 206 u32 port_num = roce->native_port_num; 207 struct net_device *ib_ndev = NULL; 208 struct mlx5_core_dev *mdev; 209 struct mlx5_ib_dev *ibdev; 210 211 ibdev = roce->dev; 212 mdev = mlx5_ib_get_native_port_mdev(ibdev, port_num, NULL); 213 if (!mdev) 214 return NOTIFY_DONE; 215 216 switch (event) { 217 case NETDEV_REGISTER: 218 /* Should already be registered during the load */ 219 if (ibdev->is_rep) 220 break; 221 222 ib_ndev = ib_device_get_netdev(&ibdev->ib_dev, port_num); 223 /* Exit if already registered */ 224 if (ib_ndev) 225 goto put_ndev; 226 227 if (ndev->dev.parent == mdev->device) 228 ib_device_set_netdev(&ibdev->ib_dev, ndev, port_num); 229 break; 230 231 case NETDEV_UNREGISTER: 232 /* In case of reps, ib device goes away before the netdevs */ 233 if (ibdev->is_rep) 234 break; 235 ib_ndev = ib_device_get_netdev(&ibdev->ib_dev, port_num); 236 if (ib_ndev == ndev) 237 ib_device_set_netdev(&ibdev->ib_dev, NULL, port_num); 238 goto put_ndev; 239 240 case NETDEV_CHANGE: 241 case NETDEV_UP: 242 case NETDEV_DOWN: { 243 struct net_device *upper = NULL; 244 245 if (mlx5_lag_is_roce(mdev) || mlx5_lag_is_sriov(mdev)) { 246 struct net_device *lag_ndev; 247 248 if(mlx5_lag_is_roce(mdev)) 249 lag_ndev = ib_device_get_netdev(&ibdev->ib_dev, 1); 250 else /* sriov lag */ 251 lag_ndev = mlx5_ib_get_rep_uplink_netdev(ibdev); 252 253 if (lag_ndev) { 254 upper = netdev_master_upper_dev_get(lag_ndev); 255 dev_put(lag_ndev); 256 } else { 257 goto done; 258 } 259 } 260 261 if (ibdev->is_rep) 262 roce = mlx5_get_rep_roce(ibdev, ndev, upper, &port_num); 263 if (!roce) 264 return NOTIFY_DONE; 265 266 ib_ndev = ib_device_get_netdev(&ibdev->ib_dev, port_num); 267 268 if (mlx5_netdev_send_event(ibdev, ndev, upper, ib_ndev)) { 269 struct ib_event ibev = { }; 270 enum ib_port_state port_state; 271 272 if (get_port_state(&ibdev->ib_dev, port_num, 273 &port_state)) 274 goto put_ndev; 275 276 if (roce->last_port_state == port_state) 277 goto put_ndev; 278 279 roce->last_port_state = port_state; 280 ibev.device = &ibdev->ib_dev; 281 if (port_state == IB_PORT_DOWN) 282 ibev.event = IB_EVENT_PORT_ERR; 283 else if (port_state == IB_PORT_ACTIVE) 284 ibev.event = IB_EVENT_PORT_ACTIVE; 285 else 286 goto put_ndev; 287 288 ibev.element.port_num = port_num; 289 ib_dispatch_event(&ibev); 290 } 291 break; 292 } 293 294 default: 295 break; 296 } 297 put_ndev: 298 dev_put(ib_ndev); 299 done: 300 mlx5_ib_put_native_port_mdev(ibdev, port_num); 301 return NOTIFY_DONE; 302 } 303 304 struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *ibdev, 305 u32 ib_port_num, 306 u32 *native_port_num) 307 { 308 enum rdma_link_layer ll = mlx5_ib_port_link_layer(&ibdev->ib_dev, 309 ib_port_num); 310 struct mlx5_core_dev *mdev = NULL; 311 struct mlx5_ib_multiport_info *mpi; 312 struct mlx5_ib_port *port; 313 314 if (ibdev->ib_dev.type == RDMA_DEVICE_TYPE_SMI) { 315 if (native_port_num) 316 *native_port_num = smi_to_native_portnum(ibdev, 317 ib_port_num); 318 return ibdev->mdev; 319 320 } 321 322 if (!mlx5_core_mp_enabled(ibdev->mdev) || 323 ll != IB_LINK_LAYER_ETHERNET) { 324 if (native_port_num) 325 *native_port_num = ib_port_num; 326 return ibdev->mdev; 327 } 328 329 if (native_port_num) 330 *native_port_num = 1; 331 332 port = &ibdev->port[ib_port_num - 1]; 333 spin_lock(&port->mp.mpi_lock); 334 mpi = ibdev->port[ib_port_num - 1].mp.mpi; 335 if (mpi && !mpi->unaffiliate) { 336 mdev = mpi->mdev; 337 /* If it's the master no need to refcount, it'll exist 338 * as long as the ib_dev exists. 339 */ 340 if (!mpi->is_master) 341 mpi->mdev_refcnt++; 342 } 343 spin_unlock(&port->mp.mpi_lock); 344 345 return mdev; 346 } 347 348 void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *ibdev, u32 port_num) 349 { 350 enum rdma_link_layer ll = mlx5_ib_port_link_layer(&ibdev->ib_dev, 351 port_num); 352 struct mlx5_ib_multiport_info *mpi; 353 struct mlx5_ib_port *port; 354 355 if (!mlx5_core_mp_enabled(ibdev->mdev) || ll != IB_LINK_LAYER_ETHERNET) 356 return; 357 358 port = &ibdev->port[port_num - 1]; 359 360 spin_lock(&port->mp.mpi_lock); 361 mpi = ibdev->port[port_num - 1].mp.mpi; 362 if (mpi->is_master) 363 goto out; 364 365 mpi->mdev_refcnt--; 366 if (mpi->unaffiliate) 367 complete(&mpi->unref_comp); 368 out: 369 spin_unlock(&port->mp.mpi_lock); 370 } 371 372 static int translate_eth_legacy_proto_oper(u32 eth_proto_oper, 373 u16 *active_speed, u8 *active_width) 374 { 375 switch (eth_proto_oper) { 376 case MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII): 377 case MLX5E_PROT_MASK(MLX5E_1000BASE_KX): 378 case MLX5E_PROT_MASK(MLX5E_100BASE_TX): 379 case MLX5E_PROT_MASK(MLX5E_1000BASE_T): 380 *active_width = IB_WIDTH_1X; 381 *active_speed = IB_SPEED_SDR; 382 break; 383 case MLX5E_PROT_MASK(MLX5E_10GBASE_T): 384 case MLX5E_PROT_MASK(MLX5E_10GBASE_CX4): 385 case MLX5E_PROT_MASK(MLX5E_10GBASE_KX4): 386 case MLX5E_PROT_MASK(MLX5E_10GBASE_KR): 387 case MLX5E_PROT_MASK(MLX5E_10GBASE_CR): 388 case MLX5E_PROT_MASK(MLX5E_10GBASE_SR): 389 case MLX5E_PROT_MASK(MLX5E_10GBASE_ER): 390 *active_width = IB_WIDTH_1X; 391 *active_speed = IB_SPEED_QDR; 392 break; 393 case MLX5E_PROT_MASK(MLX5E_25GBASE_CR): 394 case MLX5E_PROT_MASK(MLX5E_25GBASE_KR): 395 case MLX5E_PROT_MASK(MLX5E_25GBASE_SR): 396 *active_width = IB_WIDTH_1X; 397 *active_speed = IB_SPEED_EDR; 398 break; 399 case MLX5E_PROT_MASK(MLX5E_40GBASE_CR4): 400 case MLX5E_PROT_MASK(MLX5E_40GBASE_KR4): 401 case MLX5E_PROT_MASK(MLX5E_40GBASE_SR4): 402 case MLX5E_PROT_MASK(MLX5E_40GBASE_LR4): 403 *active_width = IB_WIDTH_4X; 404 *active_speed = IB_SPEED_QDR; 405 break; 406 case MLX5E_PROT_MASK(MLX5E_50GBASE_CR2): 407 case MLX5E_PROT_MASK(MLX5E_50GBASE_KR2): 408 case MLX5E_PROT_MASK(MLX5E_50GBASE_SR2): 409 *active_width = IB_WIDTH_1X; 410 *active_speed = IB_SPEED_HDR; 411 break; 412 case MLX5E_PROT_MASK(MLX5E_56GBASE_R4): 413 *active_width = IB_WIDTH_4X; 414 *active_speed = IB_SPEED_FDR; 415 break; 416 case MLX5E_PROT_MASK(MLX5E_100GBASE_CR4): 417 case MLX5E_PROT_MASK(MLX5E_100GBASE_SR4): 418 case MLX5E_PROT_MASK(MLX5E_100GBASE_KR4): 419 case MLX5E_PROT_MASK(MLX5E_100GBASE_LR4): 420 *active_width = IB_WIDTH_4X; 421 *active_speed = IB_SPEED_EDR; 422 break; 423 default: 424 return -EINVAL; 425 } 426 427 return 0; 428 } 429 430 static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u16 *active_speed, 431 u8 *active_width) 432 { 433 switch (eth_proto_oper) { 434 case MLX5E_PROT_MASK(MLX5E_SGMII_100M): 435 case MLX5E_PROT_MASK(MLX5E_1000BASE_X_SGMII): 436 *active_width = IB_WIDTH_1X; 437 *active_speed = IB_SPEED_SDR; 438 break; 439 case MLX5E_PROT_MASK(MLX5E_5GBASE_R): 440 *active_width = IB_WIDTH_1X; 441 *active_speed = IB_SPEED_DDR; 442 break; 443 case MLX5E_PROT_MASK(MLX5E_10GBASE_XFI_XAUI_1): 444 *active_width = IB_WIDTH_1X; 445 *active_speed = IB_SPEED_QDR; 446 break; 447 case MLX5E_PROT_MASK(MLX5E_40GBASE_XLAUI_4_XLPPI_4): 448 *active_width = IB_WIDTH_4X; 449 *active_speed = IB_SPEED_QDR; 450 break; 451 case MLX5E_PROT_MASK(MLX5E_25GAUI_1_25GBASE_CR_KR): 452 *active_width = IB_WIDTH_1X; 453 *active_speed = IB_SPEED_EDR; 454 break; 455 case MLX5E_PROT_MASK(MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2): 456 *active_width = IB_WIDTH_2X; 457 *active_speed = IB_SPEED_EDR; 458 break; 459 case MLX5E_PROT_MASK(MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR): 460 *active_width = IB_WIDTH_1X; 461 *active_speed = IB_SPEED_HDR; 462 break; 463 case MLX5E_PROT_MASK(MLX5E_CAUI_4_100GBASE_CR4_KR4): 464 *active_width = IB_WIDTH_4X; 465 *active_speed = IB_SPEED_EDR; 466 break; 467 case MLX5E_PROT_MASK(MLX5E_100GAUI_2_100GBASE_CR2_KR2): 468 *active_width = IB_WIDTH_2X; 469 *active_speed = IB_SPEED_HDR; 470 break; 471 case MLX5E_PROT_MASK(MLX5E_100GAUI_1_100GBASE_CR_KR): 472 *active_width = IB_WIDTH_1X; 473 *active_speed = IB_SPEED_NDR; 474 break; 475 case MLX5E_PROT_MASK(MLX5E_200GAUI_4_200GBASE_CR4_KR4): 476 *active_width = IB_WIDTH_4X; 477 *active_speed = IB_SPEED_HDR; 478 break; 479 case MLX5E_PROT_MASK(MLX5E_200GAUI_2_200GBASE_CR2_KR2): 480 *active_width = IB_WIDTH_2X; 481 *active_speed = IB_SPEED_NDR; 482 break; 483 case MLX5E_PROT_MASK(MLX5E_400GAUI_8_400GBASE_CR8): 484 *active_width = IB_WIDTH_8X; 485 *active_speed = IB_SPEED_HDR; 486 break; 487 case MLX5E_PROT_MASK(MLX5E_400GAUI_4_400GBASE_CR4_KR4): 488 *active_width = IB_WIDTH_4X; 489 *active_speed = IB_SPEED_NDR; 490 break; 491 case MLX5E_PROT_MASK(MLX5E_800GAUI_8_800GBASE_CR8_KR8): 492 *active_width = IB_WIDTH_8X; 493 *active_speed = IB_SPEED_NDR; 494 break; 495 default: 496 return -EINVAL; 497 } 498 499 return 0; 500 } 501 502 static int translate_eth_proto_oper(u32 eth_proto_oper, u16 *active_speed, 503 u8 *active_width, bool ext) 504 { 505 return ext ? 506 translate_eth_ext_proto_oper(eth_proto_oper, active_speed, 507 active_width) : 508 translate_eth_legacy_proto_oper(eth_proto_oper, active_speed, 509 active_width); 510 } 511 512 static int mlx5_query_port_roce(struct ib_device *device, u32 port_num, 513 struct ib_port_attr *props) 514 { 515 struct mlx5_ib_dev *dev = to_mdev(device); 516 u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {0}; 517 struct mlx5_core_dev *mdev; 518 struct net_device *ndev, *upper; 519 enum ib_mtu ndev_ib_mtu; 520 bool put_mdev = true; 521 u32 eth_prot_oper; 522 u32 mdev_port_num; 523 bool ext; 524 int err; 525 526 mdev = mlx5_ib_get_native_port_mdev(dev, port_num, &mdev_port_num); 527 if (!mdev) { 528 /* This means the port isn't affiliated yet. Get the 529 * info for the master port instead. 530 */ 531 put_mdev = false; 532 mdev = dev->mdev; 533 mdev_port_num = 1; 534 port_num = 1; 535 } 536 537 /* Possible bad flows are checked before filling out props so in case 538 * of an error it will still be zeroed out. 539 * Use native port in case of reps 540 */ 541 if (dev->is_rep) 542 err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 543 1, 0); 544 else 545 err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 546 mdev_port_num, 0); 547 if (err) 548 goto out; 549 ext = !!MLX5_GET_ETH_PROTO(ptys_reg, out, true, eth_proto_capability); 550 eth_prot_oper = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_oper); 551 552 props->active_width = IB_WIDTH_4X; 553 props->active_speed = IB_SPEED_QDR; 554 555 translate_eth_proto_oper(eth_prot_oper, &props->active_speed, 556 &props->active_width, ext); 557 558 if (!dev->is_rep && dev->mdev->roce.roce_en) { 559 u16 qkey_viol_cntr; 560 561 props->port_cap_flags |= IB_PORT_CM_SUP; 562 props->ip_gids = true; 563 props->gid_tbl_len = MLX5_CAP_ROCE(dev->mdev, 564 roce_address_table_size); 565 mlx5_query_nic_vport_qkey_viol_cntr(mdev, &qkey_viol_cntr); 566 props->qkey_viol_cntr = qkey_viol_cntr; 567 } 568 props->max_mtu = IB_MTU_4096; 569 props->max_msg_sz = 1 << MLX5_CAP_GEN(dev->mdev, log_max_msg); 570 props->pkey_tbl_len = 1; 571 props->state = IB_PORT_DOWN; 572 props->phys_state = IB_PORT_PHYS_STATE_DISABLED; 573 574 /* If this is a stub query for an unaffiliated port stop here */ 575 if (!put_mdev) 576 goto out; 577 578 ndev = ib_device_get_netdev(device, port_num); 579 if (!ndev) 580 goto out; 581 582 if (mlx5_lag_is_roce(mdev) || mlx5_lag_is_sriov(mdev)) { 583 rcu_read_lock(); 584 upper = netdev_master_upper_dev_get_rcu(ndev); 585 if (upper) { 586 dev_put(ndev); 587 ndev = upper; 588 dev_hold(ndev); 589 } 590 rcu_read_unlock(); 591 } 592 593 if (netif_running(ndev) && netif_carrier_ok(ndev)) { 594 props->state = IB_PORT_ACTIVE; 595 props->phys_state = IB_PORT_PHYS_STATE_LINK_UP; 596 } 597 598 ndev_ib_mtu = iboe_get_mtu(ndev->mtu); 599 600 dev_put(ndev); 601 602 props->active_mtu = min(props->max_mtu, ndev_ib_mtu); 603 out: 604 if (put_mdev) 605 mlx5_ib_put_native_port_mdev(dev, port_num); 606 return err; 607 } 608 609 int set_roce_addr(struct mlx5_ib_dev *dev, u32 port_num, 610 unsigned int index, const union ib_gid *gid, 611 const struct ib_gid_attr *attr) 612 { 613 enum ib_gid_type gid_type; 614 u16 vlan_id = 0xffff; 615 u8 roce_version = 0; 616 u8 roce_l3_type = 0; 617 u8 mac[ETH_ALEN]; 618 int ret; 619 620 gid_type = attr->gid_type; 621 if (gid) { 622 ret = rdma_read_gid_l2_fields(attr, &vlan_id, &mac[0]); 623 if (ret) 624 return ret; 625 } 626 627 switch (gid_type) { 628 case IB_GID_TYPE_ROCE: 629 roce_version = MLX5_ROCE_VERSION_1; 630 break; 631 case IB_GID_TYPE_ROCE_UDP_ENCAP: 632 roce_version = MLX5_ROCE_VERSION_2; 633 if (gid && ipv6_addr_v4mapped((void *)gid)) 634 roce_l3_type = MLX5_ROCE_L3_TYPE_IPV4; 635 else 636 roce_l3_type = MLX5_ROCE_L3_TYPE_IPV6; 637 break; 638 639 default: 640 mlx5_ib_warn(dev, "Unexpected GID type %u\n", gid_type); 641 } 642 643 return mlx5_core_roce_gid_set(dev->mdev, index, roce_version, 644 roce_l3_type, gid->raw, mac, 645 vlan_id < VLAN_CFI_MASK, vlan_id, 646 port_num); 647 } 648 649 static int mlx5_ib_add_gid(const struct ib_gid_attr *attr, 650 __always_unused void **context) 651 { 652 int ret; 653 654 ret = mlx5r_add_gid_macsec_operations(attr); 655 if (ret) 656 return ret; 657 658 return set_roce_addr(to_mdev(attr->device), attr->port_num, 659 attr->index, &attr->gid, attr); 660 } 661 662 static int mlx5_ib_del_gid(const struct ib_gid_attr *attr, 663 __always_unused void **context) 664 { 665 int ret; 666 667 ret = set_roce_addr(to_mdev(attr->device), attr->port_num, 668 attr->index, NULL, attr); 669 if (ret) 670 return ret; 671 672 mlx5r_del_gid_macsec_operations(attr); 673 return 0; 674 } 675 676 __be16 mlx5_get_roce_udp_sport_min(const struct mlx5_ib_dev *dev, 677 const struct ib_gid_attr *attr) 678 { 679 if (attr->gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP) 680 return 0; 681 682 return cpu_to_be16(MLX5_CAP_ROCE(dev->mdev, r_roce_min_src_udp_port)); 683 } 684 685 static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev) 686 { 687 if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB) 688 return !MLX5_CAP_GEN(dev->mdev, ib_virt); 689 return 0; 690 } 691 692 enum { 693 MLX5_VPORT_ACCESS_METHOD_MAD, 694 MLX5_VPORT_ACCESS_METHOD_HCA, 695 MLX5_VPORT_ACCESS_METHOD_NIC, 696 }; 697 698 static int mlx5_get_vport_access_method(struct ib_device *ibdev) 699 { 700 if (mlx5_use_mad_ifc(to_mdev(ibdev))) 701 return MLX5_VPORT_ACCESS_METHOD_MAD; 702 703 if (mlx5_ib_port_link_layer(ibdev, 1) == 704 IB_LINK_LAYER_ETHERNET) 705 return MLX5_VPORT_ACCESS_METHOD_NIC; 706 707 return MLX5_VPORT_ACCESS_METHOD_HCA; 708 } 709 710 static void get_atomic_caps(struct mlx5_ib_dev *dev, 711 u8 atomic_size_qp, 712 struct ib_device_attr *props) 713 { 714 u8 tmp; 715 u8 atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations); 716 u8 atomic_req_8B_endianness_mode = 717 MLX5_CAP_ATOMIC(dev->mdev, atomic_req_8B_endianness_mode); 718 719 /* Check if HW supports 8 bytes standard atomic operations and capable 720 * of host endianness respond 721 */ 722 tmp = MLX5_ATOMIC_OPS_CMP_SWAP | MLX5_ATOMIC_OPS_FETCH_ADD; 723 if (((atomic_operations & tmp) == tmp) && 724 (atomic_size_qp & MLX5_ATOMIC_SIZE_QP_8BYTES) && 725 (atomic_req_8B_endianness_mode)) { 726 props->atomic_cap = IB_ATOMIC_HCA; 727 } else { 728 props->atomic_cap = IB_ATOMIC_NONE; 729 } 730 } 731 732 static void get_atomic_caps_qp(struct mlx5_ib_dev *dev, 733 struct ib_device_attr *props) 734 { 735 u8 atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp); 736 737 get_atomic_caps(dev, atomic_size_qp, props); 738 } 739 740 static int mlx5_query_system_image_guid(struct ib_device *ibdev, 741 __be64 *sys_image_guid) 742 { 743 struct mlx5_ib_dev *dev = to_mdev(ibdev); 744 struct mlx5_core_dev *mdev = dev->mdev; 745 u64 tmp; 746 int err; 747 748 switch (mlx5_get_vport_access_method(ibdev)) { 749 case MLX5_VPORT_ACCESS_METHOD_MAD: 750 return mlx5_query_mad_ifc_system_image_guid(ibdev, 751 sys_image_guid); 752 753 case MLX5_VPORT_ACCESS_METHOD_HCA: 754 err = mlx5_query_hca_vport_system_image_guid(mdev, &tmp); 755 break; 756 757 case MLX5_VPORT_ACCESS_METHOD_NIC: 758 err = mlx5_query_nic_vport_system_image_guid(mdev, &tmp); 759 break; 760 761 default: 762 return -EINVAL; 763 } 764 765 if (!err) 766 *sys_image_guid = cpu_to_be64(tmp); 767 768 return err; 769 770 } 771 772 static int mlx5_query_max_pkeys(struct ib_device *ibdev, 773 u16 *max_pkeys) 774 { 775 struct mlx5_ib_dev *dev = to_mdev(ibdev); 776 struct mlx5_core_dev *mdev = dev->mdev; 777 778 switch (mlx5_get_vport_access_method(ibdev)) { 779 case MLX5_VPORT_ACCESS_METHOD_MAD: 780 return mlx5_query_mad_ifc_max_pkeys(ibdev, max_pkeys); 781 782 case MLX5_VPORT_ACCESS_METHOD_HCA: 783 case MLX5_VPORT_ACCESS_METHOD_NIC: 784 *max_pkeys = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev, 785 pkey_table_size)); 786 return 0; 787 788 default: 789 return -EINVAL; 790 } 791 } 792 793 static int mlx5_query_vendor_id(struct ib_device *ibdev, 794 u32 *vendor_id) 795 { 796 struct mlx5_ib_dev *dev = to_mdev(ibdev); 797 798 switch (mlx5_get_vport_access_method(ibdev)) { 799 case MLX5_VPORT_ACCESS_METHOD_MAD: 800 return mlx5_query_mad_ifc_vendor_id(ibdev, vendor_id); 801 802 case MLX5_VPORT_ACCESS_METHOD_HCA: 803 case MLX5_VPORT_ACCESS_METHOD_NIC: 804 return mlx5_core_query_vendor_id(dev->mdev, vendor_id); 805 806 default: 807 return -EINVAL; 808 } 809 } 810 811 static int mlx5_query_node_guid(struct mlx5_ib_dev *dev, 812 __be64 *node_guid) 813 { 814 u64 tmp; 815 int err; 816 817 switch (mlx5_get_vport_access_method(&dev->ib_dev)) { 818 case MLX5_VPORT_ACCESS_METHOD_MAD: 819 return mlx5_query_mad_ifc_node_guid(dev, node_guid); 820 821 case MLX5_VPORT_ACCESS_METHOD_HCA: 822 err = mlx5_query_hca_vport_node_guid(dev->mdev, &tmp); 823 break; 824 825 case MLX5_VPORT_ACCESS_METHOD_NIC: 826 err = mlx5_query_nic_vport_node_guid(dev->mdev, &tmp); 827 break; 828 829 default: 830 return -EINVAL; 831 } 832 833 if (!err) 834 *node_guid = cpu_to_be64(tmp); 835 836 return err; 837 } 838 839 struct mlx5_reg_node_desc { 840 u8 desc[IB_DEVICE_NODE_DESC_MAX]; 841 }; 842 843 static int mlx5_query_node_desc(struct mlx5_ib_dev *dev, char *node_desc) 844 { 845 struct mlx5_reg_node_desc in; 846 847 if (mlx5_use_mad_ifc(dev)) 848 return mlx5_query_mad_ifc_node_desc(dev, node_desc); 849 850 memset(&in, 0, sizeof(in)); 851 852 return mlx5_core_access_reg(dev->mdev, &in, sizeof(in), node_desc, 853 sizeof(struct mlx5_reg_node_desc), 854 MLX5_REG_NODE_DESC, 0, 0); 855 } 856 857 static void fill_esw_mgr_reg_c0(struct mlx5_core_dev *mdev, 858 struct mlx5_ib_query_device_resp *resp) 859 { 860 struct mlx5_eswitch *esw = mdev->priv.eswitch; 861 u16 vport = mlx5_eswitch_manager_vport(mdev); 862 863 resp->reg_c0.value = mlx5_eswitch_get_vport_metadata_for_match(esw, 864 vport); 865 resp->reg_c0.mask = mlx5_eswitch_get_vport_metadata_mask(); 866 } 867 868 static int mlx5_ib_query_device(struct ib_device *ibdev, 869 struct ib_device_attr *props, 870 struct ib_udata *uhw) 871 { 872 size_t uhw_outlen = (uhw) ? uhw->outlen : 0; 873 struct mlx5_ib_dev *dev = to_mdev(ibdev); 874 struct mlx5_core_dev *mdev = dev->mdev; 875 int err = -ENOMEM; 876 int max_sq_desc; 877 int max_rq_sg; 878 int max_sq_sg; 879 u64 min_page_size = 1ull << MLX5_CAP_GEN(mdev, log_pg_sz); 880 bool raw_support = !mlx5_core_mp_enabled(mdev); 881 struct mlx5_ib_query_device_resp resp = {}; 882 size_t resp_len; 883 u64 max_tso; 884 885 resp_len = sizeof(resp.comp_mask) + sizeof(resp.response_length); 886 if (uhw_outlen && uhw_outlen < resp_len) 887 return -EINVAL; 888 889 resp.response_length = resp_len; 890 891 if (uhw && uhw->inlen && !ib_is_udata_cleared(uhw, 0, uhw->inlen)) 892 return -EINVAL; 893 894 memset(props, 0, sizeof(*props)); 895 err = mlx5_query_system_image_guid(ibdev, 896 &props->sys_image_guid); 897 if (err) 898 return err; 899 900 props->max_pkeys = dev->pkey_table_len; 901 902 err = mlx5_query_vendor_id(ibdev, &props->vendor_id); 903 if (err) 904 return err; 905 906 props->fw_ver = ((u64)fw_rev_maj(dev->mdev) << 32) | 907 (fw_rev_min(dev->mdev) << 16) | 908 fw_rev_sub(dev->mdev); 909 props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT | 910 IB_DEVICE_PORT_ACTIVE_EVENT | 911 IB_DEVICE_SYS_IMAGE_GUID | 912 IB_DEVICE_RC_RNR_NAK_GEN; 913 914 if (MLX5_CAP_GEN(mdev, pkv)) 915 props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR; 916 if (MLX5_CAP_GEN(mdev, qkv)) 917 props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR; 918 if (MLX5_CAP_GEN(mdev, apm)) 919 props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG; 920 if (MLX5_CAP_GEN(mdev, xrc)) 921 props->device_cap_flags |= IB_DEVICE_XRC; 922 if (MLX5_CAP_GEN(mdev, imaicl)) { 923 props->device_cap_flags |= IB_DEVICE_MEM_WINDOW | 924 IB_DEVICE_MEM_WINDOW_TYPE_2B; 925 props->max_mw = 1 << MLX5_CAP_GEN(mdev, log_max_mkey); 926 /* We support 'Gappy' memory registration too */ 927 props->kernel_cap_flags |= IBK_SG_GAPS_REG; 928 } 929 /* IB_WR_REG_MR always requires changing the entity size with UMR */ 930 if (!MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled)) 931 props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; 932 if (MLX5_CAP_GEN(mdev, sho)) { 933 props->kernel_cap_flags |= IBK_INTEGRITY_HANDOVER; 934 /* At this stage no support for signature handover */ 935 props->sig_prot_cap = IB_PROT_T10DIF_TYPE_1 | 936 IB_PROT_T10DIF_TYPE_2 | 937 IB_PROT_T10DIF_TYPE_3; 938 props->sig_guard_cap = IB_GUARD_T10DIF_CRC | 939 IB_GUARD_T10DIF_CSUM; 940 } 941 if (MLX5_CAP_GEN(mdev, block_lb_mc)) 942 props->kernel_cap_flags |= IBK_BLOCK_MULTICAST_LOOPBACK; 943 944 if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) && raw_support) { 945 if (MLX5_CAP_ETH(mdev, csum_cap)) { 946 /* Legacy bit to support old userspace libraries */ 947 props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM; 948 props->raw_packet_caps |= IB_RAW_PACKET_CAP_IP_CSUM; 949 } 950 951 if (MLX5_CAP_ETH(dev->mdev, vlan_cap)) 952 props->raw_packet_caps |= 953 IB_RAW_PACKET_CAP_CVLAN_STRIPPING; 954 955 if (offsetofend(typeof(resp), tso_caps) <= uhw_outlen) { 956 max_tso = MLX5_CAP_ETH(mdev, max_lso_cap); 957 if (max_tso) { 958 resp.tso_caps.max_tso = 1 << max_tso; 959 resp.tso_caps.supported_qpts |= 960 1 << IB_QPT_RAW_PACKET; 961 resp.response_length += sizeof(resp.tso_caps); 962 } 963 } 964 965 if (offsetofend(typeof(resp), rss_caps) <= uhw_outlen) { 966 resp.rss_caps.rx_hash_function = 967 MLX5_RX_HASH_FUNC_TOEPLITZ; 968 resp.rss_caps.rx_hash_fields_mask = 969 MLX5_RX_HASH_SRC_IPV4 | 970 MLX5_RX_HASH_DST_IPV4 | 971 MLX5_RX_HASH_SRC_IPV6 | 972 MLX5_RX_HASH_DST_IPV6 | 973 MLX5_RX_HASH_SRC_PORT_TCP | 974 MLX5_RX_HASH_DST_PORT_TCP | 975 MLX5_RX_HASH_SRC_PORT_UDP | 976 MLX5_RX_HASH_DST_PORT_UDP | 977 MLX5_RX_HASH_INNER; 978 resp.response_length += sizeof(resp.rss_caps); 979 } 980 } else { 981 if (offsetofend(typeof(resp), tso_caps) <= uhw_outlen) 982 resp.response_length += sizeof(resp.tso_caps); 983 if (offsetofend(typeof(resp), rss_caps) <= uhw_outlen) 984 resp.response_length += sizeof(resp.rss_caps); 985 } 986 987 if (MLX5_CAP_GEN(mdev, ipoib_basic_offloads)) { 988 props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM; 989 props->kernel_cap_flags |= IBK_UD_TSO; 990 } 991 992 if (MLX5_CAP_GEN(dev->mdev, rq_delay_drop) && 993 MLX5_CAP_GEN(dev->mdev, general_notification_event) && 994 raw_support) 995 props->raw_packet_caps |= IB_RAW_PACKET_CAP_DELAY_DROP; 996 997 if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads) && 998 MLX5_CAP_IPOIB_ENHANCED(mdev, csum_cap)) 999 props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM; 1000 1001 if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) && 1002 MLX5_CAP_ETH(dev->mdev, scatter_fcs) && 1003 raw_support) { 1004 /* Legacy bit to support old userspace libraries */ 1005 props->device_cap_flags |= IB_DEVICE_RAW_SCATTER_FCS; 1006 props->raw_packet_caps |= IB_RAW_PACKET_CAP_SCATTER_FCS; 1007 } 1008 1009 if (MLX5_CAP_DEV_MEM(mdev, memic)) { 1010 props->max_dm_size = 1011 MLX5_CAP_DEV_MEM(mdev, max_memic_size); 1012 } 1013 1014 if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS)) 1015 props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING; 1016 1017 if (MLX5_CAP_GEN(mdev, end_pad)) 1018 props->device_cap_flags |= IB_DEVICE_PCI_WRITE_END_PADDING; 1019 1020 props->vendor_part_id = mdev->pdev->device; 1021 props->hw_ver = mdev->pdev->revision; 1022 1023 props->max_mr_size = ~0ull; 1024 props->page_size_cap = ~(min_page_size - 1); 1025 props->max_qp = 1 << MLX5_CAP_GEN(mdev, log_max_qp); 1026 props->max_qp_wr = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz); 1027 max_rq_sg = MLX5_CAP_GEN(mdev, max_wqe_sz_rq) / 1028 sizeof(struct mlx5_wqe_data_seg); 1029 max_sq_desc = min_t(int, MLX5_CAP_GEN(mdev, max_wqe_sz_sq), 512); 1030 max_sq_sg = (max_sq_desc - sizeof(struct mlx5_wqe_ctrl_seg) - 1031 sizeof(struct mlx5_wqe_raddr_seg)) / 1032 sizeof(struct mlx5_wqe_data_seg); 1033 props->max_send_sge = max_sq_sg; 1034 props->max_recv_sge = max_rq_sg; 1035 props->max_sge_rd = MLX5_MAX_SGE_RD; 1036 props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq); 1037 props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1; 1038 props->max_mr = 1 << MLX5_CAP_GEN(mdev, log_max_mkey); 1039 props->max_pd = 1 << MLX5_CAP_GEN(mdev, log_max_pd); 1040 props->max_qp_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_req_qp); 1041 props->max_qp_init_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_res_qp); 1042 props->max_srq = 1 << MLX5_CAP_GEN(mdev, log_max_srq); 1043 props->max_srq_wr = (1 << MLX5_CAP_GEN(mdev, log_max_srq_sz)) - 1; 1044 props->local_ca_ack_delay = MLX5_CAP_GEN(mdev, local_ca_ack_delay); 1045 props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; 1046 props->max_srq_sge = max_rq_sg - 1; 1047 props->max_fast_reg_page_list_len = 1048 1 << MLX5_CAP_GEN(mdev, log_max_klm_list_size); 1049 props->max_pi_fast_reg_page_list_len = 1050 props->max_fast_reg_page_list_len / 2; 1051 props->max_sgl_rd = 1052 MLX5_CAP_GEN(mdev, max_sgl_for_optimized_performance); 1053 get_atomic_caps_qp(dev, props); 1054 props->masked_atomic_cap = IB_ATOMIC_NONE; 1055 props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg); 1056 props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg); 1057 props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * 1058 props->max_mcast_grp; 1059 props->max_ah = INT_MAX; 1060 props->hca_core_clock = MLX5_CAP_GEN(mdev, device_frequency_khz); 1061 props->timestamp_mask = 0x7FFFFFFFFFFFFFFFULL; 1062 1063 if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { 1064 if (dev->odp_caps.general_caps & IB_ODP_SUPPORT) 1065 props->kernel_cap_flags |= IBK_ON_DEMAND_PAGING; 1066 props->odp_caps = dev->odp_caps; 1067 if (!uhw) { 1068 /* ODP for kernel QPs is not implemented for receive 1069 * WQEs and SRQ WQEs 1070 */ 1071 props->odp_caps.per_transport_caps.rc_odp_caps &= 1072 ~(IB_ODP_SUPPORT_READ | 1073 IB_ODP_SUPPORT_SRQ_RECV); 1074 props->odp_caps.per_transport_caps.uc_odp_caps &= 1075 ~(IB_ODP_SUPPORT_READ | 1076 IB_ODP_SUPPORT_SRQ_RECV); 1077 props->odp_caps.per_transport_caps.ud_odp_caps &= 1078 ~(IB_ODP_SUPPORT_READ | 1079 IB_ODP_SUPPORT_SRQ_RECV); 1080 props->odp_caps.per_transport_caps.xrc_odp_caps &= 1081 ~(IB_ODP_SUPPORT_READ | 1082 IB_ODP_SUPPORT_SRQ_RECV); 1083 } 1084 } 1085 1086 if (mlx5_core_is_vf(mdev)) 1087 props->kernel_cap_flags |= IBK_VIRTUAL_FUNCTION; 1088 1089 if (mlx5_ib_port_link_layer(ibdev, 1) == 1090 IB_LINK_LAYER_ETHERNET && raw_support) { 1091 props->rss_caps.max_rwq_indirection_tables = 1092 1 << MLX5_CAP_GEN(dev->mdev, log_max_rqt); 1093 props->rss_caps.max_rwq_indirection_table_size = 1094 1 << MLX5_CAP_GEN(dev->mdev, log_max_rqt_size); 1095 props->rss_caps.supported_qpts = 1 << IB_QPT_RAW_PACKET; 1096 props->max_wq_type_rq = 1097 1 << MLX5_CAP_GEN(dev->mdev, log_max_rq); 1098 } 1099 1100 if (MLX5_CAP_GEN(mdev, tag_matching)) { 1101 props->tm_caps.max_num_tags = 1102 (1 << MLX5_CAP_GEN(mdev, log_tag_matching_list_sz)) - 1; 1103 props->tm_caps.max_ops = 1104 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz); 1105 props->tm_caps.max_sge = MLX5_TM_MAX_SGE; 1106 } 1107 1108 if (MLX5_CAP_GEN(mdev, tag_matching) && 1109 MLX5_CAP_GEN(mdev, rndv_offload_rc)) { 1110 props->tm_caps.flags = IB_TM_CAP_RNDV_RC; 1111 props->tm_caps.max_rndv_hdr_size = MLX5_TM_MAX_RNDV_MSG_SIZE; 1112 } 1113 1114 if (MLX5_CAP_GEN(dev->mdev, cq_moderation)) { 1115 props->cq_caps.max_cq_moderation_count = 1116 MLX5_MAX_CQ_COUNT; 1117 props->cq_caps.max_cq_moderation_period = 1118 MLX5_MAX_CQ_PERIOD; 1119 } 1120 1121 if (offsetofend(typeof(resp), cqe_comp_caps) <= uhw_outlen) { 1122 resp.response_length += sizeof(resp.cqe_comp_caps); 1123 1124 if (MLX5_CAP_GEN(dev->mdev, cqe_compression)) { 1125 resp.cqe_comp_caps.max_num = 1126 MLX5_CAP_GEN(dev->mdev, 1127 cqe_compression_max_num); 1128 1129 resp.cqe_comp_caps.supported_format = 1130 MLX5_IB_CQE_RES_FORMAT_HASH | 1131 MLX5_IB_CQE_RES_FORMAT_CSUM; 1132 1133 if (MLX5_CAP_GEN(dev->mdev, mini_cqe_resp_stride_index)) 1134 resp.cqe_comp_caps.supported_format |= 1135 MLX5_IB_CQE_RES_FORMAT_CSUM_STRIDX; 1136 } 1137 } 1138 1139 if (offsetofend(typeof(resp), packet_pacing_caps) <= uhw_outlen && 1140 raw_support) { 1141 if (MLX5_CAP_QOS(mdev, packet_pacing) && 1142 MLX5_CAP_GEN(mdev, qos)) { 1143 resp.packet_pacing_caps.qp_rate_limit_max = 1144 MLX5_CAP_QOS(mdev, packet_pacing_max_rate); 1145 resp.packet_pacing_caps.qp_rate_limit_min = 1146 MLX5_CAP_QOS(mdev, packet_pacing_min_rate); 1147 resp.packet_pacing_caps.supported_qpts |= 1148 1 << IB_QPT_RAW_PACKET; 1149 if (MLX5_CAP_QOS(mdev, packet_pacing_burst_bound) && 1150 MLX5_CAP_QOS(mdev, packet_pacing_typical_size)) 1151 resp.packet_pacing_caps.cap_flags |= 1152 MLX5_IB_PP_SUPPORT_BURST; 1153 } 1154 resp.response_length += sizeof(resp.packet_pacing_caps); 1155 } 1156 1157 if (offsetofend(typeof(resp), mlx5_ib_support_multi_pkt_send_wqes) <= 1158 uhw_outlen) { 1159 if (MLX5_CAP_ETH(mdev, multi_pkt_send_wqe)) 1160 resp.mlx5_ib_support_multi_pkt_send_wqes = 1161 MLX5_IB_ALLOW_MPW; 1162 1163 if (MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe)) 1164 resp.mlx5_ib_support_multi_pkt_send_wqes |= 1165 MLX5_IB_SUPPORT_EMPW; 1166 1167 resp.response_length += 1168 sizeof(resp.mlx5_ib_support_multi_pkt_send_wqes); 1169 } 1170 1171 if (offsetofend(typeof(resp), flags) <= uhw_outlen) { 1172 resp.response_length += sizeof(resp.flags); 1173 1174 if (MLX5_CAP_GEN(mdev, cqe_compression_128)) 1175 resp.flags |= 1176 MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_COMP; 1177 1178 if (MLX5_CAP_GEN(mdev, cqe_128_always)) 1179 resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_PAD; 1180 if (MLX5_CAP_GEN(mdev, qp_packet_based)) 1181 resp.flags |= 1182 MLX5_IB_QUERY_DEV_RESP_PACKET_BASED_CREDIT_MODE; 1183 1184 resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_SCAT2CQE_DCT; 1185 } 1186 1187 if (offsetofend(typeof(resp), sw_parsing_caps) <= uhw_outlen) { 1188 resp.response_length += sizeof(resp.sw_parsing_caps); 1189 if (MLX5_CAP_ETH(mdev, swp)) { 1190 resp.sw_parsing_caps.sw_parsing_offloads |= 1191 MLX5_IB_SW_PARSING; 1192 1193 if (MLX5_CAP_ETH(mdev, swp_csum)) 1194 resp.sw_parsing_caps.sw_parsing_offloads |= 1195 MLX5_IB_SW_PARSING_CSUM; 1196 1197 if (MLX5_CAP_ETH(mdev, swp_lso)) 1198 resp.sw_parsing_caps.sw_parsing_offloads |= 1199 MLX5_IB_SW_PARSING_LSO; 1200 1201 if (resp.sw_parsing_caps.sw_parsing_offloads) 1202 resp.sw_parsing_caps.supported_qpts = 1203 BIT(IB_QPT_RAW_PACKET); 1204 } 1205 } 1206 1207 if (offsetofend(typeof(resp), striding_rq_caps) <= uhw_outlen && 1208 raw_support) { 1209 resp.response_length += sizeof(resp.striding_rq_caps); 1210 if (MLX5_CAP_GEN(mdev, striding_rq)) { 1211 resp.striding_rq_caps.min_single_stride_log_num_of_bytes = 1212 MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES; 1213 resp.striding_rq_caps.max_single_stride_log_num_of_bytes = 1214 MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES; 1215 if (MLX5_CAP_GEN(dev->mdev, ext_stride_num_range)) 1216 resp.striding_rq_caps 1217 .min_single_wqe_log_num_of_strides = 1218 MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES; 1219 else 1220 resp.striding_rq_caps 1221 .min_single_wqe_log_num_of_strides = 1222 MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES; 1223 resp.striding_rq_caps.max_single_wqe_log_num_of_strides = 1224 MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES; 1225 resp.striding_rq_caps.supported_qpts = 1226 BIT(IB_QPT_RAW_PACKET); 1227 } 1228 } 1229 1230 if (offsetofend(typeof(resp), tunnel_offloads_caps) <= uhw_outlen) { 1231 resp.response_length += sizeof(resp.tunnel_offloads_caps); 1232 if (MLX5_CAP_ETH(mdev, tunnel_stateless_vxlan)) 1233 resp.tunnel_offloads_caps |= 1234 MLX5_IB_TUNNELED_OFFLOADS_VXLAN; 1235 if (MLX5_CAP_ETH(mdev, tunnel_stateless_geneve_rx)) 1236 resp.tunnel_offloads_caps |= 1237 MLX5_IB_TUNNELED_OFFLOADS_GENEVE; 1238 if (MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) 1239 resp.tunnel_offloads_caps |= 1240 MLX5_IB_TUNNELED_OFFLOADS_GRE; 1241 if (MLX5_CAP_ETH(mdev, tunnel_stateless_mpls_over_gre)) 1242 resp.tunnel_offloads_caps |= 1243 MLX5_IB_TUNNELED_OFFLOADS_MPLS_GRE; 1244 if (MLX5_CAP_ETH(mdev, tunnel_stateless_mpls_over_udp)) 1245 resp.tunnel_offloads_caps |= 1246 MLX5_IB_TUNNELED_OFFLOADS_MPLS_UDP; 1247 } 1248 1249 if (offsetofend(typeof(resp), dci_streams_caps) <= uhw_outlen) { 1250 resp.response_length += sizeof(resp.dci_streams_caps); 1251 1252 resp.dci_streams_caps.max_log_num_concurent = 1253 MLX5_CAP_GEN(mdev, log_max_dci_stream_channels); 1254 1255 resp.dci_streams_caps.max_log_num_errored = 1256 MLX5_CAP_GEN(mdev, log_max_dci_errored_streams); 1257 } 1258 1259 if (offsetofend(typeof(resp), reserved) <= uhw_outlen) 1260 resp.response_length += sizeof(resp.reserved); 1261 1262 if (offsetofend(typeof(resp), reg_c0) <= uhw_outlen) { 1263 struct mlx5_eswitch *esw = mdev->priv.eswitch; 1264 1265 resp.response_length += sizeof(resp.reg_c0); 1266 1267 if (mlx5_eswitch_mode(mdev) == MLX5_ESWITCH_OFFLOADS && 1268 mlx5_eswitch_vport_match_metadata_enabled(esw)) 1269 fill_esw_mgr_reg_c0(mdev, &resp); 1270 } 1271 1272 if (uhw_outlen) { 1273 err = ib_copy_to_udata(uhw, &resp, resp.response_length); 1274 1275 if (err) 1276 return err; 1277 } 1278 1279 return 0; 1280 } 1281 1282 static void translate_active_width(struct ib_device *ibdev, u16 active_width, 1283 u8 *ib_width) 1284 { 1285 struct mlx5_ib_dev *dev = to_mdev(ibdev); 1286 1287 if (active_width & MLX5_PTYS_WIDTH_1X) 1288 *ib_width = IB_WIDTH_1X; 1289 else if (active_width & MLX5_PTYS_WIDTH_2X) 1290 *ib_width = IB_WIDTH_2X; 1291 else if (active_width & MLX5_PTYS_WIDTH_4X) 1292 *ib_width = IB_WIDTH_4X; 1293 else if (active_width & MLX5_PTYS_WIDTH_8X) 1294 *ib_width = IB_WIDTH_8X; 1295 else if (active_width & MLX5_PTYS_WIDTH_12X) 1296 *ib_width = IB_WIDTH_12X; 1297 else { 1298 mlx5_ib_dbg(dev, "Invalid active_width %d, setting width to default value: 4x\n", 1299 active_width); 1300 *ib_width = IB_WIDTH_4X; 1301 } 1302 1303 return; 1304 } 1305 1306 static int mlx5_mtu_to_ib_mtu(int mtu) 1307 { 1308 switch (mtu) { 1309 case 256: return 1; 1310 case 512: return 2; 1311 case 1024: return 3; 1312 case 2048: return 4; 1313 case 4096: return 5; 1314 default: 1315 pr_warn("invalid mtu\n"); 1316 return -1; 1317 } 1318 } 1319 1320 enum ib_max_vl_num { 1321 __IB_MAX_VL_0 = 1, 1322 __IB_MAX_VL_0_1 = 2, 1323 __IB_MAX_VL_0_3 = 3, 1324 __IB_MAX_VL_0_7 = 4, 1325 __IB_MAX_VL_0_14 = 5, 1326 }; 1327 1328 enum mlx5_vl_hw_cap { 1329 MLX5_VL_HW_0 = 1, 1330 MLX5_VL_HW_0_1 = 2, 1331 MLX5_VL_HW_0_2 = 3, 1332 MLX5_VL_HW_0_3 = 4, 1333 MLX5_VL_HW_0_4 = 5, 1334 MLX5_VL_HW_0_5 = 6, 1335 MLX5_VL_HW_0_6 = 7, 1336 MLX5_VL_HW_0_7 = 8, 1337 MLX5_VL_HW_0_14 = 15 1338 }; 1339 1340 static int translate_max_vl_num(struct ib_device *ibdev, u8 vl_hw_cap, 1341 u8 *max_vl_num) 1342 { 1343 switch (vl_hw_cap) { 1344 case MLX5_VL_HW_0: 1345 *max_vl_num = __IB_MAX_VL_0; 1346 break; 1347 case MLX5_VL_HW_0_1: 1348 *max_vl_num = __IB_MAX_VL_0_1; 1349 break; 1350 case MLX5_VL_HW_0_3: 1351 *max_vl_num = __IB_MAX_VL_0_3; 1352 break; 1353 case MLX5_VL_HW_0_7: 1354 *max_vl_num = __IB_MAX_VL_0_7; 1355 break; 1356 case MLX5_VL_HW_0_14: 1357 *max_vl_num = __IB_MAX_VL_0_14; 1358 break; 1359 1360 default: 1361 return -EINVAL; 1362 } 1363 1364 return 0; 1365 } 1366 1367 static int mlx5_query_hca_port(struct ib_device *ibdev, u32 port, 1368 struct ib_port_attr *props) 1369 { 1370 struct mlx5_ib_dev *dev = to_mdev(ibdev); 1371 struct mlx5_core_dev *mdev = dev->mdev; 1372 struct mlx5_hca_vport_context *rep; 1373 u8 vl_hw_cap, plane_index = 0; 1374 u16 max_mtu; 1375 u16 oper_mtu; 1376 int err; 1377 u16 ib_link_width_oper; 1378 1379 rep = kzalloc(sizeof(*rep), GFP_KERNEL); 1380 if (!rep) { 1381 err = -ENOMEM; 1382 goto out; 1383 } 1384 1385 /* props being zeroed by the caller, avoid zeroing it here */ 1386 1387 if (ibdev->type == RDMA_DEVICE_TYPE_SMI) { 1388 plane_index = port; 1389 port = smi_to_native_portnum(dev, port); 1390 } 1391 1392 err = mlx5_query_hca_vport_context(mdev, 0, port, 0, rep); 1393 if (err) 1394 goto out; 1395 1396 props->lid = rep->lid; 1397 props->lmc = rep->lmc; 1398 props->sm_lid = rep->sm_lid; 1399 props->sm_sl = rep->sm_sl; 1400 props->state = rep->vport_state; 1401 props->phys_state = rep->port_physical_state; 1402 1403 props->port_cap_flags = rep->cap_mask1; 1404 if (dev->num_plane) { 1405 props->port_cap_flags |= IB_PORT_SM_DISABLED; 1406 props->port_cap_flags &= ~IB_PORT_SM; 1407 } else if (ibdev->type == RDMA_DEVICE_TYPE_SMI) 1408 props->port_cap_flags &= ~IB_PORT_CM_SUP; 1409 1410 props->gid_tbl_len = mlx5_get_gid_table_len(MLX5_CAP_GEN(mdev, gid_table_size)); 1411 props->max_msg_sz = 1 << MLX5_CAP_GEN(mdev, log_max_msg); 1412 props->pkey_tbl_len = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev, pkey_table_size)); 1413 props->bad_pkey_cntr = rep->pkey_violation_counter; 1414 props->qkey_viol_cntr = rep->qkey_violation_counter; 1415 props->subnet_timeout = rep->subnet_timeout; 1416 props->init_type_reply = rep->init_type_reply; 1417 1418 if (props->port_cap_flags & IB_PORT_CAP_MASK2_SUP) 1419 props->port_cap_flags2 = rep->cap_mask2; 1420 1421 err = mlx5_query_ib_port_oper(mdev, &ib_link_width_oper, 1422 &props->active_speed, port, plane_index); 1423 if (err) 1424 goto out; 1425 1426 translate_active_width(ibdev, ib_link_width_oper, &props->active_width); 1427 1428 mlx5_query_port_max_mtu(mdev, &max_mtu, port); 1429 1430 props->max_mtu = mlx5_mtu_to_ib_mtu(max_mtu); 1431 1432 mlx5_query_port_oper_mtu(mdev, &oper_mtu, port); 1433 1434 props->active_mtu = mlx5_mtu_to_ib_mtu(oper_mtu); 1435 1436 err = mlx5_query_port_vl_hw_cap(mdev, &vl_hw_cap, port); 1437 if (err) 1438 goto out; 1439 1440 err = translate_max_vl_num(ibdev, vl_hw_cap, 1441 &props->max_vl_num); 1442 out: 1443 kfree(rep); 1444 return err; 1445 } 1446 1447 int mlx5_ib_query_port(struct ib_device *ibdev, u32 port, 1448 struct ib_port_attr *props) 1449 { 1450 unsigned int count; 1451 int ret; 1452 1453 switch (mlx5_get_vport_access_method(ibdev)) { 1454 case MLX5_VPORT_ACCESS_METHOD_MAD: 1455 ret = mlx5_query_mad_ifc_port(ibdev, port, props); 1456 break; 1457 1458 case MLX5_VPORT_ACCESS_METHOD_HCA: 1459 ret = mlx5_query_hca_port(ibdev, port, props); 1460 break; 1461 1462 case MLX5_VPORT_ACCESS_METHOD_NIC: 1463 ret = mlx5_query_port_roce(ibdev, port, props); 1464 break; 1465 1466 default: 1467 ret = -EINVAL; 1468 } 1469 1470 if (!ret && props) { 1471 struct mlx5_ib_dev *dev = to_mdev(ibdev); 1472 struct mlx5_core_dev *mdev; 1473 bool put_mdev = true; 1474 1475 mdev = mlx5_ib_get_native_port_mdev(dev, port, NULL); 1476 if (!mdev) { 1477 /* If the port isn't affiliated yet query the master. 1478 * The master and slave will have the same values. 1479 */ 1480 mdev = dev->mdev; 1481 port = 1; 1482 put_mdev = false; 1483 } 1484 count = mlx5_core_reserved_gids_count(mdev); 1485 if (put_mdev) 1486 mlx5_ib_put_native_port_mdev(dev, port); 1487 props->gid_tbl_len -= count; 1488 } 1489 return ret; 1490 } 1491 1492 static int mlx5_ib_rep_query_port(struct ib_device *ibdev, u32 port, 1493 struct ib_port_attr *props) 1494 { 1495 return mlx5_query_port_roce(ibdev, port, props); 1496 } 1497 1498 static int mlx5_ib_rep_query_pkey(struct ib_device *ibdev, u32 port, u16 index, 1499 u16 *pkey) 1500 { 1501 /* Default special Pkey for representor device port as per the 1502 * IB specification 1.3 section 10.9.1.2. 1503 */ 1504 *pkey = 0xffff; 1505 return 0; 1506 } 1507 1508 static int mlx5_ib_query_gid(struct ib_device *ibdev, u32 port, int index, 1509 union ib_gid *gid) 1510 { 1511 struct mlx5_ib_dev *dev = to_mdev(ibdev); 1512 struct mlx5_core_dev *mdev = dev->mdev; 1513 1514 switch (mlx5_get_vport_access_method(ibdev)) { 1515 case MLX5_VPORT_ACCESS_METHOD_MAD: 1516 return mlx5_query_mad_ifc_gids(ibdev, port, index, gid); 1517 1518 case MLX5_VPORT_ACCESS_METHOD_HCA: 1519 return mlx5_query_hca_vport_gid(mdev, 0, port, 0, index, gid); 1520 1521 default: 1522 return -EINVAL; 1523 } 1524 1525 } 1526 1527 static int mlx5_query_hca_nic_pkey(struct ib_device *ibdev, u32 port, 1528 u16 index, u16 *pkey) 1529 { 1530 struct mlx5_ib_dev *dev = to_mdev(ibdev); 1531 struct mlx5_core_dev *mdev; 1532 bool put_mdev = true; 1533 u32 mdev_port_num; 1534 int err; 1535 1536 mdev = mlx5_ib_get_native_port_mdev(dev, port, &mdev_port_num); 1537 if (!mdev) { 1538 /* The port isn't affiliated yet, get the PKey from the master 1539 * port. For RoCE the PKey tables will be the same. 1540 */ 1541 put_mdev = false; 1542 mdev = dev->mdev; 1543 mdev_port_num = 1; 1544 } 1545 1546 err = mlx5_query_hca_vport_pkey(mdev, 0, mdev_port_num, 0, 1547 index, pkey); 1548 if (put_mdev) 1549 mlx5_ib_put_native_port_mdev(dev, port); 1550 1551 return err; 1552 } 1553 1554 static int mlx5_ib_query_pkey(struct ib_device *ibdev, u32 port, u16 index, 1555 u16 *pkey) 1556 { 1557 switch (mlx5_get_vport_access_method(ibdev)) { 1558 case MLX5_VPORT_ACCESS_METHOD_MAD: 1559 return mlx5_query_mad_ifc_pkey(ibdev, port, index, pkey); 1560 1561 case MLX5_VPORT_ACCESS_METHOD_HCA: 1562 case MLX5_VPORT_ACCESS_METHOD_NIC: 1563 return mlx5_query_hca_nic_pkey(ibdev, port, index, pkey); 1564 default: 1565 return -EINVAL; 1566 } 1567 } 1568 1569 static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask, 1570 struct ib_device_modify *props) 1571 { 1572 struct mlx5_ib_dev *dev = to_mdev(ibdev); 1573 struct mlx5_reg_node_desc in; 1574 struct mlx5_reg_node_desc out; 1575 int err; 1576 1577 if (mask & ~IB_DEVICE_MODIFY_NODE_DESC) 1578 return -EOPNOTSUPP; 1579 1580 if (!(mask & IB_DEVICE_MODIFY_NODE_DESC)) 1581 return 0; 1582 1583 /* 1584 * If possible, pass node desc to FW, so it can generate 1585 * a 144 trap. If cmd fails, just ignore. 1586 */ 1587 memcpy(&in, props->node_desc, IB_DEVICE_NODE_DESC_MAX); 1588 err = mlx5_core_access_reg(dev->mdev, &in, sizeof(in), &out, 1589 sizeof(out), MLX5_REG_NODE_DESC, 0, 1); 1590 if (err) 1591 return err; 1592 1593 memcpy(ibdev->node_desc, props->node_desc, IB_DEVICE_NODE_DESC_MAX); 1594 1595 return err; 1596 } 1597 1598 static int set_port_caps_atomic(struct mlx5_ib_dev *dev, u32 port_num, u32 mask, 1599 u32 value) 1600 { 1601 struct mlx5_hca_vport_context ctx = {}; 1602 struct mlx5_core_dev *mdev; 1603 u32 mdev_port_num; 1604 int err; 1605 1606 mdev = mlx5_ib_get_native_port_mdev(dev, port_num, &mdev_port_num); 1607 if (!mdev) 1608 return -ENODEV; 1609 1610 err = mlx5_query_hca_vport_context(mdev, 0, mdev_port_num, 0, &ctx); 1611 if (err) 1612 goto out; 1613 1614 if (~ctx.cap_mask1_perm & mask) { 1615 mlx5_ib_warn(dev, "trying to change bitmask 0x%X but change supported 0x%X\n", 1616 mask, ctx.cap_mask1_perm); 1617 err = -EINVAL; 1618 goto out; 1619 } 1620 1621 ctx.cap_mask1 = value; 1622 ctx.cap_mask1_perm = mask; 1623 err = mlx5_core_modify_hca_vport_context(mdev, 0, mdev_port_num, 1624 0, &ctx); 1625 1626 out: 1627 mlx5_ib_put_native_port_mdev(dev, port_num); 1628 1629 return err; 1630 } 1631 1632 static int mlx5_ib_modify_port(struct ib_device *ibdev, u32 port, int mask, 1633 struct ib_port_modify *props) 1634 { 1635 struct mlx5_ib_dev *dev = to_mdev(ibdev); 1636 struct ib_port_attr attr; 1637 u32 tmp; 1638 int err; 1639 u32 change_mask; 1640 u32 value; 1641 bool is_ib = (mlx5_ib_port_link_layer(ibdev, port) == 1642 IB_LINK_LAYER_INFINIBAND); 1643 1644 /* CM layer calls ib_modify_port() regardless of the link layer. For 1645 * Ethernet ports, qkey violation and Port capabilities are meaningless. 1646 */ 1647 if (!is_ib) 1648 return 0; 1649 1650 if (MLX5_CAP_GEN(dev->mdev, ib_virt) && is_ib) { 1651 change_mask = props->clr_port_cap_mask | props->set_port_cap_mask; 1652 value = ~props->clr_port_cap_mask | props->set_port_cap_mask; 1653 return set_port_caps_atomic(dev, port, change_mask, value); 1654 } 1655 1656 mutex_lock(&dev->cap_mask_mutex); 1657 1658 err = ib_query_port(ibdev, port, &attr); 1659 if (err) 1660 goto out; 1661 1662 tmp = (attr.port_cap_flags | props->set_port_cap_mask) & 1663 ~props->clr_port_cap_mask; 1664 1665 err = mlx5_set_port_caps(dev->mdev, port, tmp); 1666 1667 out: 1668 mutex_unlock(&dev->cap_mask_mutex); 1669 return err; 1670 } 1671 1672 static void print_lib_caps(struct mlx5_ib_dev *dev, u64 caps) 1673 { 1674 mlx5_ib_dbg(dev, "MLX5_LIB_CAP_4K_UAR = %s\n", 1675 caps & MLX5_LIB_CAP_4K_UAR ? "y" : "n"); 1676 } 1677 1678 static u16 calc_dynamic_bfregs(int uars_per_sys_page) 1679 { 1680 /* Large page with non 4k uar support might limit the dynamic size */ 1681 if (uars_per_sys_page == 1 && PAGE_SIZE > 4096) 1682 return MLX5_MIN_DYN_BFREGS; 1683 1684 return MLX5_MAX_DYN_BFREGS; 1685 } 1686 1687 static int calc_total_bfregs(struct mlx5_ib_dev *dev, bool lib_uar_4k, 1688 struct mlx5_ib_alloc_ucontext_req_v2 *req, 1689 struct mlx5_bfreg_info *bfregi) 1690 { 1691 int uars_per_sys_page; 1692 int bfregs_per_sys_page; 1693 int ref_bfregs = req->total_num_bfregs; 1694 1695 if (req->total_num_bfregs == 0) 1696 return -EINVAL; 1697 1698 BUILD_BUG_ON(MLX5_MAX_BFREGS % MLX5_NON_FP_BFREGS_IN_PAGE); 1699 BUILD_BUG_ON(MLX5_MAX_BFREGS < MLX5_NON_FP_BFREGS_IN_PAGE); 1700 1701 if (req->total_num_bfregs > MLX5_MAX_BFREGS) 1702 return -ENOMEM; 1703 1704 uars_per_sys_page = get_uars_per_sys_page(dev, lib_uar_4k); 1705 bfregs_per_sys_page = uars_per_sys_page * MLX5_NON_FP_BFREGS_PER_UAR; 1706 /* This holds the required static allocation asked by the user */ 1707 req->total_num_bfregs = ALIGN(req->total_num_bfregs, bfregs_per_sys_page); 1708 if (req->num_low_latency_bfregs > req->total_num_bfregs - 1) 1709 return -EINVAL; 1710 1711 bfregi->num_static_sys_pages = req->total_num_bfregs / bfregs_per_sys_page; 1712 bfregi->num_dyn_bfregs = ALIGN(calc_dynamic_bfregs(uars_per_sys_page), bfregs_per_sys_page); 1713 bfregi->total_num_bfregs = req->total_num_bfregs + bfregi->num_dyn_bfregs; 1714 bfregi->num_sys_pages = bfregi->total_num_bfregs / bfregs_per_sys_page; 1715 1716 mlx5_ib_dbg(dev, "uar_4k: fw support %s, lib support %s, user requested %d bfregs, allocated %d, total bfregs %d, using %d sys pages\n", 1717 MLX5_CAP_GEN(dev->mdev, uar_4k) ? "yes" : "no", 1718 lib_uar_4k ? "yes" : "no", ref_bfregs, 1719 req->total_num_bfregs, bfregi->total_num_bfregs, 1720 bfregi->num_sys_pages); 1721 1722 return 0; 1723 } 1724 1725 static int allocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *context) 1726 { 1727 struct mlx5_bfreg_info *bfregi; 1728 int err; 1729 int i; 1730 1731 bfregi = &context->bfregi; 1732 for (i = 0; i < bfregi->num_static_sys_pages; i++) { 1733 err = mlx5_cmd_uar_alloc(dev->mdev, &bfregi->sys_pages[i], 1734 context->devx_uid); 1735 if (err) 1736 goto error; 1737 1738 mlx5_ib_dbg(dev, "allocated uar %d\n", bfregi->sys_pages[i]); 1739 } 1740 1741 for (i = bfregi->num_static_sys_pages; i < bfregi->num_sys_pages; i++) 1742 bfregi->sys_pages[i] = MLX5_IB_INVALID_UAR_INDEX; 1743 1744 return 0; 1745 1746 error: 1747 for (--i; i >= 0; i--) 1748 if (mlx5_cmd_uar_dealloc(dev->mdev, bfregi->sys_pages[i], 1749 context->devx_uid)) 1750 mlx5_ib_warn(dev, "failed to free uar %d\n", i); 1751 1752 return err; 1753 } 1754 1755 static void deallocate_uars(struct mlx5_ib_dev *dev, 1756 struct mlx5_ib_ucontext *context) 1757 { 1758 struct mlx5_bfreg_info *bfregi; 1759 int i; 1760 1761 bfregi = &context->bfregi; 1762 for (i = 0; i < bfregi->num_sys_pages; i++) 1763 if (i < bfregi->num_static_sys_pages || 1764 bfregi->sys_pages[i] != MLX5_IB_INVALID_UAR_INDEX) 1765 mlx5_cmd_uar_dealloc(dev->mdev, bfregi->sys_pages[i], 1766 context->devx_uid); 1767 } 1768 1769 int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp) 1770 { 1771 int err = 0; 1772 1773 mutex_lock(&dev->lb.mutex); 1774 if (td) 1775 dev->lb.user_td++; 1776 if (qp) 1777 dev->lb.qps++; 1778 1779 if (dev->lb.user_td == 2 || 1780 dev->lb.qps == 1) { 1781 if (!dev->lb.enabled) { 1782 err = mlx5_nic_vport_update_local_lb(dev->mdev, true); 1783 dev->lb.enabled = true; 1784 } 1785 } 1786 1787 mutex_unlock(&dev->lb.mutex); 1788 1789 return err; 1790 } 1791 1792 void mlx5_ib_disable_lb(struct mlx5_ib_dev *dev, bool td, bool qp) 1793 { 1794 mutex_lock(&dev->lb.mutex); 1795 if (td) 1796 dev->lb.user_td--; 1797 if (qp) 1798 dev->lb.qps--; 1799 1800 if (dev->lb.user_td == 1 && 1801 dev->lb.qps == 0) { 1802 if (dev->lb.enabled) { 1803 mlx5_nic_vport_update_local_lb(dev->mdev, false); 1804 dev->lb.enabled = false; 1805 } 1806 } 1807 1808 mutex_unlock(&dev->lb.mutex); 1809 } 1810 1811 static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn, 1812 u16 uid) 1813 { 1814 int err; 1815 1816 if (!MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) 1817 return 0; 1818 1819 err = mlx5_cmd_alloc_transport_domain(dev->mdev, tdn, uid); 1820 if (err) 1821 return err; 1822 1823 if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) || 1824 (!MLX5_CAP_GEN(dev->mdev, disable_local_lb_uc) && 1825 !MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc))) 1826 return err; 1827 1828 return mlx5_ib_enable_lb(dev, true, false); 1829 } 1830 1831 static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn, 1832 u16 uid) 1833 { 1834 if (!MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) 1835 return; 1836 1837 mlx5_cmd_dealloc_transport_domain(dev->mdev, tdn, uid); 1838 1839 if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) || 1840 (!MLX5_CAP_GEN(dev->mdev, disable_local_lb_uc) && 1841 !MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc))) 1842 return; 1843 1844 mlx5_ib_disable_lb(dev, true, false); 1845 } 1846 1847 static int set_ucontext_resp(struct ib_ucontext *uctx, 1848 struct mlx5_ib_alloc_ucontext_resp *resp) 1849 { 1850 struct ib_device *ibdev = uctx->device; 1851 struct mlx5_ib_dev *dev = to_mdev(ibdev); 1852 struct mlx5_ib_ucontext *context = to_mucontext(uctx); 1853 struct mlx5_bfreg_info *bfregi = &context->bfregi; 1854 1855 if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) { 1856 resp->dump_fill_mkey = dev->mkeys.dump_fill_mkey; 1857 resp->comp_mask |= 1858 MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DUMP_FILL_MKEY; 1859 } 1860 1861 resp->qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp); 1862 if (mlx5_wc_support_get(dev->mdev)) 1863 resp->bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, 1864 log_bf_reg_size); 1865 resp->cache_line_size = cache_line_size(); 1866 resp->max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq); 1867 resp->max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq); 1868 resp->max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); 1869 resp->max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); 1870 resp->max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz); 1871 resp->cqe_version = context->cqe_version; 1872 resp->log_uar_size = MLX5_CAP_GEN(dev->mdev, uar_4k) ? 1873 MLX5_ADAPTER_PAGE_SHIFT : PAGE_SHIFT; 1874 resp->num_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ? 1875 MLX5_CAP_GEN(dev->mdev, 1876 num_of_uars_per_page) : 1; 1877 resp->tot_bfregs = bfregi->lib_uar_dyn ? 0 : 1878 bfregi->total_num_bfregs - bfregi->num_dyn_bfregs; 1879 resp->num_ports = dev->num_ports; 1880 resp->cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE | 1881 MLX5_USER_CMDS_SUPP_UHW_CREATE_AH; 1882 1883 if (mlx5_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET) { 1884 mlx5_query_min_inline(dev->mdev, &resp->eth_min_inline); 1885 resp->eth_min_inline++; 1886 } 1887 1888 if (dev->mdev->clock_info) 1889 resp->clock_info_versions = BIT(MLX5_IB_CLOCK_INFO_V1); 1890 1891 /* 1892 * We don't want to expose information from the PCI bar that is located 1893 * after 4096 bytes, so if the arch only supports larger pages, let's 1894 * pretend we don't support reading the HCA's core clock. This is also 1895 * forced by mmap function. 1896 */ 1897 if (PAGE_SIZE <= 4096) { 1898 resp->comp_mask |= 1899 MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET; 1900 resp->hca_core_clock_offset = 1901 offsetof(struct mlx5_init_seg, 1902 internal_timer_h) % PAGE_SIZE; 1903 } 1904 1905 if (MLX5_CAP_GEN(dev->mdev, ece_support)) 1906 resp->comp_mask |= MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_ECE; 1907 1908 if (rt_supported(MLX5_CAP_GEN(dev->mdev, sq_ts_format)) && 1909 rt_supported(MLX5_CAP_GEN(dev->mdev, rq_ts_format)) && 1910 rt_supported(MLX5_CAP_ROCE(dev->mdev, qp_ts_format))) 1911 resp->comp_mask |= 1912 MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_REAL_TIME_TS; 1913 1914 resp->num_dyn_bfregs = bfregi->num_dyn_bfregs; 1915 1916 if (MLX5_CAP_GEN(dev->mdev, drain_sigerr)) 1917 resp->comp_mask |= MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_SQD2RTS; 1918 1919 resp->comp_mask |= 1920 MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_MKEY_UPDATE_TAG; 1921 1922 return 0; 1923 } 1924 1925 static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, 1926 struct ib_udata *udata) 1927 { 1928 struct ib_device *ibdev = uctx->device; 1929 struct mlx5_ib_dev *dev = to_mdev(ibdev); 1930 struct mlx5_ib_alloc_ucontext_req_v2 req = {}; 1931 struct mlx5_ib_alloc_ucontext_resp resp = {}; 1932 struct mlx5_ib_ucontext *context = to_mucontext(uctx); 1933 struct mlx5_bfreg_info *bfregi; 1934 int ver; 1935 int err; 1936 size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2, 1937 max_cqe_version); 1938 bool lib_uar_4k; 1939 bool lib_uar_dyn; 1940 1941 if (!dev->ib_active) 1942 return -EAGAIN; 1943 1944 if (udata->inlen == sizeof(struct mlx5_ib_alloc_ucontext_req)) 1945 ver = 0; 1946 else if (udata->inlen >= min_req_v2) 1947 ver = 2; 1948 else 1949 return -EINVAL; 1950 1951 err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req))); 1952 if (err) 1953 return err; 1954 1955 if (req.flags & ~MLX5_IB_ALLOC_UCTX_DEVX) 1956 return -EOPNOTSUPP; 1957 1958 if (req.comp_mask || req.reserved0 || req.reserved1 || req.reserved2) 1959 return -EOPNOTSUPP; 1960 1961 req.total_num_bfregs = ALIGN(req.total_num_bfregs, 1962 MLX5_NON_FP_BFREGS_PER_UAR); 1963 if (req.num_low_latency_bfregs > req.total_num_bfregs - 1) 1964 return -EINVAL; 1965 1966 if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) { 1967 err = mlx5_ib_devx_create(dev, true); 1968 if (err < 0) 1969 goto out_ctx; 1970 context->devx_uid = err; 1971 } 1972 1973 lib_uar_4k = req.lib_caps & MLX5_LIB_CAP_4K_UAR; 1974 lib_uar_dyn = req.lib_caps & MLX5_LIB_CAP_DYN_UAR; 1975 bfregi = &context->bfregi; 1976 1977 if (lib_uar_dyn) { 1978 bfregi->lib_uar_dyn = lib_uar_dyn; 1979 goto uar_done; 1980 } 1981 1982 /* updates req->total_num_bfregs */ 1983 err = calc_total_bfregs(dev, lib_uar_4k, &req, bfregi); 1984 if (err) 1985 goto out_devx; 1986 1987 mutex_init(&bfregi->lock); 1988 bfregi->lib_uar_4k = lib_uar_4k; 1989 bfregi->count = kcalloc(bfregi->total_num_bfregs, sizeof(*bfregi->count), 1990 GFP_KERNEL); 1991 if (!bfregi->count) { 1992 err = -ENOMEM; 1993 goto out_devx; 1994 } 1995 1996 bfregi->sys_pages = kcalloc(bfregi->num_sys_pages, 1997 sizeof(*bfregi->sys_pages), 1998 GFP_KERNEL); 1999 if (!bfregi->sys_pages) { 2000 err = -ENOMEM; 2001 goto out_count; 2002 } 2003 2004 err = allocate_uars(dev, context); 2005 if (err) 2006 goto out_sys_pages; 2007 2008 uar_done: 2009 err = mlx5_ib_alloc_transport_domain(dev, &context->tdn, 2010 context->devx_uid); 2011 if (err) 2012 goto out_uars; 2013 2014 INIT_LIST_HEAD(&context->db_page_list); 2015 mutex_init(&context->db_page_mutex); 2016 2017 context->cqe_version = min_t(__u8, 2018 (__u8)MLX5_CAP_GEN(dev->mdev, cqe_version), 2019 req.max_cqe_version); 2020 2021 err = set_ucontext_resp(uctx, &resp); 2022 if (err) 2023 goto out_mdev; 2024 2025 resp.response_length = min(udata->outlen, sizeof(resp)); 2026 err = ib_copy_to_udata(udata, &resp, resp.response_length); 2027 if (err) 2028 goto out_mdev; 2029 2030 bfregi->ver = ver; 2031 bfregi->num_low_latency_bfregs = req.num_low_latency_bfregs; 2032 context->lib_caps = req.lib_caps; 2033 print_lib_caps(dev, context->lib_caps); 2034 2035 if (mlx5_ib_lag_should_assign_affinity(dev)) { 2036 u32 port = mlx5_core_native_port_num(dev->mdev) - 1; 2037 2038 atomic_set(&context->tx_port_affinity, 2039 atomic_add_return( 2040 1, &dev->port[port].roce.tx_port_affinity)); 2041 } 2042 2043 return 0; 2044 2045 out_mdev: 2046 mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid); 2047 2048 out_uars: 2049 deallocate_uars(dev, context); 2050 2051 out_sys_pages: 2052 kfree(bfregi->sys_pages); 2053 2054 out_count: 2055 kfree(bfregi->count); 2056 2057 out_devx: 2058 if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) 2059 mlx5_ib_devx_destroy(dev, context->devx_uid); 2060 2061 out_ctx: 2062 return err; 2063 } 2064 2065 static int mlx5_ib_query_ucontext(struct ib_ucontext *ibcontext, 2066 struct uverbs_attr_bundle *attrs) 2067 { 2068 struct mlx5_ib_alloc_ucontext_resp uctx_resp = {}; 2069 int ret; 2070 2071 ret = set_ucontext_resp(ibcontext, &uctx_resp); 2072 if (ret) 2073 return ret; 2074 2075 uctx_resp.response_length = 2076 min_t(size_t, 2077 uverbs_attr_get_len(attrs, 2078 MLX5_IB_ATTR_QUERY_CONTEXT_RESP_UCTX), 2079 sizeof(uctx_resp)); 2080 2081 ret = uverbs_copy_to_struct_or_zero(attrs, 2082 MLX5_IB_ATTR_QUERY_CONTEXT_RESP_UCTX, 2083 &uctx_resp, 2084 sizeof(uctx_resp)); 2085 return ret; 2086 } 2087 2088 static void mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) 2089 { 2090 struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); 2091 struct mlx5_ib_dev *dev = to_mdev(ibcontext->device); 2092 struct mlx5_bfreg_info *bfregi; 2093 2094 bfregi = &context->bfregi; 2095 mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid); 2096 2097 deallocate_uars(dev, context); 2098 kfree(bfregi->sys_pages); 2099 kfree(bfregi->count); 2100 2101 if (context->devx_uid) 2102 mlx5_ib_devx_destroy(dev, context->devx_uid); 2103 } 2104 2105 static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev, 2106 int uar_idx) 2107 { 2108 int fw_uars_per_page; 2109 2110 fw_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ? MLX5_UARS_IN_PAGE : 1; 2111 2112 return (dev->mdev->bar_addr >> PAGE_SHIFT) + uar_idx / fw_uars_per_page; 2113 } 2114 2115 static u64 uar_index2paddress(struct mlx5_ib_dev *dev, 2116 int uar_idx) 2117 { 2118 unsigned int fw_uars_per_page; 2119 2120 fw_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ? 2121 MLX5_UARS_IN_PAGE : 1; 2122 2123 return (dev->mdev->bar_addr + (uar_idx / fw_uars_per_page) * PAGE_SIZE); 2124 } 2125 2126 static int get_command(unsigned long offset) 2127 { 2128 return (offset >> MLX5_IB_MMAP_CMD_SHIFT) & MLX5_IB_MMAP_CMD_MASK; 2129 } 2130 2131 static int get_arg(unsigned long offset) 2132 { 2133 return offset & ((1 << MLX5_IB_MMAP_CMD_SHIFT) - 1); 2134 } 2135 2136 static int get_index(unsigned long offset) 2137 { 2138 return get_arg(offset); 2139 } 2140 2141 /* Index resides in an extra byte to enable larger values than 255 */ 2142 static int get_extended_index(unsigned long offset) 2143 { 2144 return get_arg(offset) | ((offset >> 16) & 0xff) << 8; 2145 } 2146 2147 2148 static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext) 2149 { 2150 } 2151 2152 static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd) 2153 { 2154 switch (cmd) { 2155 case MLX5_IB_MMAP_WC_PAGE: 2156 return "WC"; 2157 case MLX5_IB_MMAP_REGULAR_PAGE: 2158 return "best effort WC"; 2159 case MLX5_IB_MMAP_NC_PAGE: 2160 return "NC"; 2161 case MLX5_IB_MMAP_DEVICE_MEM: 2162 return "Device Memory"; 2163 default: 2164 return "Unknown"; 2165 } 2166 } 2167 2168 static int mlx5_ib_mmap_clock_info_page(struct mlx5_ib_dev *dev, 2169 struct vm_area_struct *vma, 2170 struct mlx5_ib_ucontext *context) 2171 { 2172 if ((vma->vm_end - vma->vm_start != PAGE_SIZE) || 2173 !(vma->vm_flags & VM_SHARED)) 2174 return -EINVAL; 2175 2176 if (get_index(vma->vm_pgoff) != MLX5_IB_CLOCK_INFO_V1) 2177 return -EOPNOTSUPP; 2178 2179 if (vma->vm_flags & (VM_WRITE | VM_EXEC)) 2180 return -EPERM; 2181 vm_flags_clear(vma, VM_MAYWRITE); 2182 2183 if (!dev->mdev->clock_info) 2184 return -EOPNOTSUPP; 2185 2186 return vm_insert_page(vma, vma->vm_start, 2187 virt_to_page(dev->mdev->clock_info)); 2188 } 2189 2190 static void mlx5_ib_mmap_free(struct rdma_user_mmap_entry *entry) 2191 { 2192 struct mlx5_user_mmap_entry *mentry = to_mmmap(entry); 2193 struct mlx5_ib_dev *dev = to_mdev(entry->ucontext->device); 2194 struct mlx5_var_table *var_table = &dev->var_table; 2195 struct mlx5_ib_ucontext *context = to_mucontext(entry->ucontext); 2196 2197 switch (mentry->mmap_flag) { 2198 case MLX5_IB_MMAP_TYPE_MEMIC: 2199 case MLX5_IB_MMAP_TYPE_MEMIC_OP: 2200 mlx5_ib_dm_mmap_free(dev, mentry); 2201 break; 2202 case MLX5_IB_MMAP_TYPE_VAR: 2203 mutex_lock(&var_table->bitmap_lock); 2204 clear_bit(mentry->page_idx, var_table->bitmap); 2205 mutex_unlock(&var_table->bitmap_lock); 2206 kfree(mentry); 2207 break; 2208 case MLX5_IB_MMAP_TYPE_UAR_WC: 2209 case MLX5_IB_MMAP_TYPE_UAR_NC: 2210 mlx5_cmd_uar_dealloc(dev->mdev, mentry->page_idx, 2211 context->devx_uid); 2212 kfree(mentry); 2213 break; 2214 default: 2215 WARN_ON(true); 2216 } 2217 } 2218 2219 static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd, 2220 struct vm_area_struct *vma, 2221 struct mlx5_ib_ucontext *context) 2222 { 2223 struct mlx5_bfreg_info *bfregi = &context->bfregi; 2224 int err; 2225 unsigned long idx; 2226 phys_addr_t pfn; 2227 pgprot_t prot; 2228 u32 bfreg_dyn_idx = 0; 2229 u32 uar_index; 2230 int dyn_uar = (cmd == MLX5_IB_MMAP_ALLOC_WC); 2231 int max_valid_idx = dyn_uar ? bfregi->num_sys_pages : 2232 bfregi->num_static_sys_pages; 2233 2234 if (bfregi->lib_uar_dyn) 2235 return -EINVAL; 2236 2237 if (vma->vm_end - vma->vm_start != PAGE_SIZE) 2238 return -EINVAL; 2239 2240 if (dyn_uar) 2241 idx = get_extended_index(vma->vm_pgoff) + bfregi->num_static_sys_pages; 2242 else 2243 idx = get_index(vma->vm_pgoff); 2244 2245 if (idx >= max_valid_idx) { 2246 mlx5_ib_warn(dev, "invalid uar index %lu, max=%d\n", 2247 idx, max_valid_idx); 2248 return -EINVAL; 2249 } 2250 2251 switch (cmd) { 2252 case MLX5_IB_MMAP_WC_PAGE: 2253 case MLX5_IB_MMAP_ALLOC_WC: 2254 case MLX5_IB_MMAP_REGULAR_PAGE: 2255 /* For MLX5_IB_MMAP_REGULAR_PAGE do the best effort to get WC */ 2256 prot = pgprot_writecombine(vma->vm_page_prot); 2257 break; 2258 case MLX5_IB_MMAP_NC_PAGE: 2259 prot = pgprot_noncached(vma->vm_page_prot); 2260 break; 2261 default: 2262 return -EINVAL; 2263 } 2264 2265 if (dyn_uar) { 2266 int uars_per_page; 2267 2268 uars_per_page = get_uars_per_sys_page(dev, bfregi->lib_uar_4k); 2269 bfreg_dyn_idx = idx * (uars_per_page * MLX5_NON_FP_BFREGS_PER_UAR); 2270 if (bfreg_dyn_idx >= bfregi->total_num_bfregs) { 2271 mlx5_ib_warn(dev, "invalid bfreg_dyn_idx %u, max=%u\n", 2272 bfreg_dyn_idx, bfregi->total_num_bfregs); 2273 return -EINVAL; 2274 } 2275 2276 mutex_lock(&bfregi->lock); 2277 /* Fail if uar already allocated, first bfreg index of each 2278 * page holds its count. 2279 */ 2280 if (bfregi->count[bfreg_dyn_idx]) { 2281 mlx5_ib_warn(dev, "wrong offset, idx %lu is busy, bfregn=%u\n", idx, bfreg_dyn_idx); 2282 mutex_unlock(&bfregi->lock); 2283 return -EINVAL; 2284 } 2285 2286 bfregi->count[bfreg_dyn_idx]++; 2287 mutex_unlock(&bfregi->lock); 2288 2289 err = mlx5_cmd_uar_alloc(dev->mdev, &uar_index, 2290 context->devx_uid); 2291 if (err) { 2292 mlx5_ib_warn(dev, "UAR alloc failed\n"); 2293 goto free_bfreg; 2294 } 2295 } else { 2296 uar_index = bfregi->sys_pages[idx]; 2297 } 2298 2299 pfn = uar_index2pfn(dev, uar_index); 2300 mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn %pa\n", idx, &pfn); 2301 2302 err = rdma_user_mmap_io(&context->ibucontext, vma, pfn, PAGE_SIZE, 2303 prot, NULL); 2304 if (err) { 2305 mlx5_ib_err(dev, 2306 "rdma_user_mmap_io failed with error=%d, mmap_cmd=%s\n", 2307 err, mmap_cmd2str(cmd)); 2308 goto err; 2309 } 2310 2311 if (dyn_uar) 2312 bfregi->sys_pages[idx] = uar_index; 2313 return 0; 2314 2315 err: 2316 if (!dyn_uar) 2317 return err; 2318 2319 mlx5_cmd_uar_dealloc(dev->mdev, idx, context->devx_uid); 2320 2321 free_bfreg: 2322 mlx5_ib_free_bfreg(dev, bfregi, bfreg_dyn_idx); 2323 2324 return err; 2325 } 2326 2327 static unsigned long mlx5_vma_to_pgoff(struct vm_area_struct *vma) 2328 { 2329 unsigned long idx; 2330 u8 command; 2331 2332 command = get_command(vma->vm_pgoff); 2333 idx = get_extended_index(vma->vm_pgoff); 2334 2335 return (command << 16 | idx); 2336 } 2337 2338 static int mlx5_ib_mmap_offset(struct mlx5_ib_dev *dev, 2339 struct vm_area_struct *vma, 2340 struct ib_ucontext *ucontext) 2341 { 2342 struct mlx5_user_mmap_entry *mentry; 2343 struct rdma_user_mmap_entry *entry; 2344 unsigned long pgoff; 2345 pgprot_t prot; 2346 phys_addr_t pfn; 2347 int ret; 2348 2349 pgoff = mlx5_vma_to_pgoff(vma); 2350 entry = rdma_user_mmap_entry_get_pgoff(ucontext, pgoff); 2351 if (!entry) 2352 return -EINVAL; 2353 2354 mentry = to_mmmap(entry); 2355 pfn = (mentry->address >> PAGE_SHIFT); 2356 if (mentry->mmap_flag == MLX5_IB_MMAP_TYPE_VAR || 2357 mentry->mmap_flag == MLX5_IB_MMAP_TYPE_UAR_NC) 2358 prot = pgprot_noncached(vma->vm_page_prot); 2359 else 2360 prot = pgprot_writecombine(vma->vm_page_prot); 2361 ret = rdma_user_mmap_io(ucontext, vma, pfn, 2362 entry->npages * PAGE_SIZE, 2363 prot, 2364 entry); 2365 rdma_user_mmap_entry_put(&mentry->rdma_entry); 2366 return ret; 2367 } 2368 2369 static u64 mlx5_entry_to_mmap_offset(struct mlx5_user_mmap_entry *entry) 2370 { 2371 u64 cmd = (entry->rdma_entry.start_pgoff >> 16) & 0xFFFF; 2372 u64 index = entry->rdma_entry.start_pgoff & 0xFFFF; 2373 2374 return (((index >> 8) << 16) | (cmd << MLX5_IB_MMAP_CMD_SHIFT) | 2375 (index & 0xFF)) << PAGE_SHIFT; 2376 } 2377 2378 static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) 2379 { 2380 struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); 2381 struct mlx5_ib_dev *dev = to_mdev(ibcontext->device); 2382 unsigned long command; 2383 phys_addr_t pfn; 2384 2385 command = get_command(vma->vm_pgoff); 2386 switch (command) { 2387 case MLX5_IB_MMAP_WC_PAGE: 2388 case MLX5_IB_MMAP_ALLOC_WC: 2389 if (!mlx5_wc_support_get(dev->mdev)) 2390 return -EPERM; 2391 fallthrough; 2392 case MLX5_IB_MMAP_NC_PAGE: 2393 case MLX5_IB_MMAP_REGULAR_PAGE: 2394 return uar_mmap(dev, command, vma, context); 2395 2396 case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES: 2397 return -ENOSYS; 2398 2399 case MLX5_IB_MMAP_CORE_CLOCK: 2400 if (vma->vm_end - vma->vm_start != PAGE_SIZE) 2401 return -EINVAL; 2402 2403 if (vma->vm_flags & VM_WRITE) 2404 return -EPERM; 2405 vm_flags_clear(vma, VM_MAYWRITE); 2406 2407 /* Don't expose to user-space information it shouldn't have */ 2408 if (PAGE_SIZE > 4096) 2409 return -EOPNOTSUPP; 2410 2411 pfn = (dev->mdev->iseg_base + 2412 offsetof(struct mlx5_init_seg, internal_timer_h)) >> 2413 PAGE_SHIFT; 2414 return rdma_user_mmap_io(&context->ibucontext, vma, pfn, 2415 PAGE_SIZE, 2416 pgprot_noncached(vma->vm_page_prot), 2417 NULL); 2418 case MLX5_IB_MMAP_CLOCK_INFO: 2419 return mlx5_ib_mmap_clock_info_page(dev, vma, context); 2420 2421 default: 2422 return mlx5_ib_mmap_offset(dev, vma, ibcontext); 2423 } 2424 2425 return 0; 2426 } 2427 2428 static int mlx5_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) 2429 { 2430 struct mlx5_ib_pd *pd = to_mpd(ibpd); 2431 struct ib_device *ibdev = ibpd->device; 2432 struct mlx5_ib_alloc_pd_resp resp; 2433 int err; 2434 u32 out[MLX5_ST_SZ_DW(alloc_pd_out)] = {}; 2435 u32 in[MLX5_ST_SZ_DW(alloc_pd_in)] = {}; 2436 u16 uid = 0; 2437 struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context( 2438 udata, struct mlx5_ib_ucontext, ibucontext); 2439 2440 uid = context ? context->devx_uid : 0; 2441 MLX5_SET(alloc_pd_in, in, opcode, MLX5_CMD_OP_ALLOC_PD); 2442 MLX5_SET(alloc_pd_in, in, uid, uid); 2443 err = mlx5_cmd_exec_inout(to_mdev(ibdev)->mdev, alloc_pd, in, out); 2444 if (err) 2445 return err; 2446 2447 pd->pdn = MLX5_GET(alloc_pd_out, out, pd); 2448 pd->uid = uid; 2449 if (udata) { 2450 resp.pdn = pd->pdn; 2451 if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { 2452 mlx5_cmd_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn, uid); 2453 return -EFAULT; 2454 } 2455 } 2456 2457 return 0; 2458 } 2459 2460 static int mlx5_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) 2461 { 2462 struct mlx5_ib_dev *mdev = to_mdev(pd->device); 2463 struct mlx5_ib_pd *mpd = to_mpd(pd); 2464 2465 return mlx5_cmd_dealloc_pd(mdev->mdev, mpd->pdn, mpd->uid); 2466 } 2467 2468 static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) 2469 { 2470 struct mlx5_ib_dev *dev = to_mdev(ibqp->device); 2471 struct mlx5_ib_qp *mqp = to_mqp(ibqp); 2472 int err; 2473 u16 uid; 2474 2475 uid = ibqp->pd ? 2476 to_mpd(ibqp->pd)->uid : 0; 2477 2478 if (mqp->flags & IB_QP_CREATE_SOURCE_QPN) { 2479 mlx5_ib_dbg(dev, "Attaching a multi cast group to underlay QP is not supported\n"); 2480 return -EOPNOTSUPP; 2481 } 2482 2483 err = mlx5_cmd_attach_mcg(dev->mdev, gid, ibqp->qp_num, uid); 2484 if (err) 2485 mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n", 2486 ibqp->qp_num, gid->raw); 2487 2488 return err; 2489 } 2490 2491 static int mlx5_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) 2492 { 2493 struct mlx5_ib_dev *dev = to_mdev(ibqp->device); 2494 int err; 2495 u16 uid; 2496 2497 uid = ibqp->pd ? 2498 to_mpd(ibqp->pd)->uid : 0; 2499 err = mlx5_cmd_detach_mcg(dev->mdev, gid, ibqp->qp_num, uid); 2500 if (err) 2501 mlx5_ib_warn(dev, "failed detaching QPN 0x%x, MGID %pI6\n", 2502 ibqp->qp_num, gid->raw); 2503 2504 return err; 2505 } 2506 2507 static int init_node_data(struct mlx5_ib_dev *dev) 2508 { 2509 int err; 2510 2511 err = mlx5_query_node_desc(dev, dev->ib_dev.node_desc); 2512 if (err) 2513 return err; 2514 2515 dev->mdev->rev_id = dev->mdev->pdev->revision; 2516 2517 return mlx5_query_node_guid(dev, &dev->ib_dev.node_guid); 2518 } 2519 2520 static ssize_t fw_pages_show(struct device *device, 2521 struct device_attribute *attr, char *buf) 2522 { 2523 struct mlx5_ib_dev *dev = 2524 rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); 2525 2526 return sysfs_emit(buf, "%d\n", dev->mdev->priv.fw_pages); 2527 } 2528 static DEVICE_ATTR_RO(fw_pages); 2529 2530 static ssize_t reg_pages_show(struct device *device, 2531 struct device_attribute *attr, char *buf) 2532 { 2533 struct mlx5_ib_dev *dev = 2534 rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); 2535 2536 return sysfs_emit(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages)); 2537 } 2538 static DEVICE_ATTR_RO(reg_pages); 2539 2540 static ssize_t hca_type_show(struct device *device, 2541 struct device_attribute *attr, char *buf) 2542 { 2543 struct mlx5_ib_dev *dev = 2544 rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); 2545 2546 return sysfs_emit(buf, "MT%d\n", dev->mdev->pdev->device); 2547 } 2548 static DEVICE_ATTR_RO(hca_type); 2549 2550 static ssize_t hw_rev_show(struct device *device, 2551 struct device_attribute *attr, char *buf) 2552 { 2553 struct mlx5_ib_dev *dev = 2554 rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); 2555 2556 return sysfs_emit(buf, "%x\n", dev->mdev->rev_id); 2557 } 2558 static DEVICE_ATTR_RO(hw_rev); 2559 2560 static ssize_t board_id_show(struct device *device, 2561 struct device_attribute *attr, char *buf) 2562 { 2563 struct mlx5_ib_dev *dev = 2564 rdma_device_to_drv_device(device, struct mlx5_ib_dev, ib_dev); 2565 2566 return sysfs_emit(buf, "%.*s\n", MLX5_BOARD_ID_LEN, 2567 dev->mdev->board_id); 2568 } 2569 static DEVICE_ATTR_RO(board_id); 2570 2571 static struct attribute *mlx5_class_attributes[] = { 2572 &dev_attr_hw_rev.attr, 2573 &dev_attr_hca_type.attr, 2574 &dev_attr_board_id.attr, 2575 &dev_attr_fw_pages.attr, 2576 &dev_attr_reg_pages.attr, 2577 NULL, 2578 }; 2579 2580 static const struct attribute_group mlx5_attr_group = { 2581 .attrs = mlx5_class_attributes, 2582 }; 2583 2584 static void pkey_change_handler(struct work_struct *work) 2585 { 2586 struct mlx5_ib_port_resources *ports = 2587 container_of(work, struct mlx5_ib_port_resources, 2588 pkey_change_work); 2589 2590 if (!ports->gsi) 2591 /* 2592 * We got this event before device was fully configured 2593 * and MAD registration code wasn't called/finished yet. 2594 */ 2595 return; 2596 2597 mlx5_ib_gsi_pkey_change(ports->gsi); 2598 } 2599 2600 static void mlx5_ib_handle_internal_error(struct mlx5_ib_dev *ibdev) 2601 { 2602 struct mlx5_ib_qp *mqp; 2603 struct mlx5_ib_cq *send_mcq, *recv_mcq; 2604 struct mlx5_core_cq *mcq; 2605 struct list_head cq_armed_list; 2606 unsigned long flags_qp; 2607 unsigned long flags_cq; 2608 unsigned long flags; 2609 2610 INIT_LIST_HEAD(&cq_armed_list); 2611 2612 /* Go over qp list reside on that ibdev, sync with create/destroy qp.*/ 2613 spin_lock_irqsave(&ibdev->reset_flow_resource_lock, flags); 2614 list_for_each_entry(mqp, &ibdev->qp_list, qps_list) { 2615 spin_lock_irqsave(&mqp->sq.lock, flags_qp); 2616 if (mqp->sq.tail != mqp->sq.head) { 2617 send_mcq = to_mcq(mqp->ibqp.send_cq); 2618 spin_lock_irqsave(&send_mcq->lock, flags_cq); 2619 if (send_mcq->mcq.comp && 2620 mqp->ibqp.send_cq->comp_handler) { 2621 if (!send_mcq->mcq.reset_notify_added) { 2622 send_mcq->mcq.reset_notify_added = 1; 2623 list_add_tail(&send_mcq->mcq.reset_notify, 2624 &cq_armed_list); 2625 } 2626 } 2627 spin_unlock_irqrestore(&send_mcq->lock, flags_cq); 2628 } 2629 spin_unlock_irqrestore(&mqp->sq.lock, flags_qp); 2630 spin_lock_irqsave(&mqp->rq.lock, flags_qp); 2631 /* no handling is needed for SRQ */ 2632 if (!mqp->ibqp.srq) { 2633 if (mqp->rq.tail != mqp->rq.head) { 2634 recv_mcq = to_mcq(mqp->ibqp.recv_cq); 2635 spin_lock_irqsave(&recv_mcq->lock, flags_cq); 2636 if (recv_mcq->mcq.comp && 2637 mqp->ibqp.recv_cq->comp_handler) { 2638 if (!recv_mcq->mcq.reset_notify_added) { 2639 recv_mcq->mcq.reset_notify_added = 1; 2640 list_add_tail(&recv_mcq->mcq.reset_notify, 2641 &cq_armed_list); 2642 } 2643 } 2644 spin_unlock_irqrestore(&recv_mcq->lock, 2645 flags_cq); 2646 } 2647 } 2648 spin_unlock_irqrestore(&mqp->rq.lock, flags_qp); 2649 } 2650 /*At that point all inflight post send were put to be executed as of we 2651 * lock/unlock above locks Now need to arm all involved CQs. 2652 */ 2653 list_for_each_entry(mcq, &cq_armed_list, reset_notify) { 2654 mcq->comp(mcq, NULL); 2655 } 2656 spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags); 2657 } 2658 2659 static void delay_drop_handler(struct work_struct *work) 2660 { 2661 int err; 2662 struct mlx5_ib_delay_drop *delay_drop = 2663 container_of(work, struct mlx5_ib_delay_drop, 2664 delay_drop_work); 2665 2666 atomic_inc(&delay_drop->events_cnt); 2667 2668 mutex_lock(&delay_drop->lock); 2669 err = mlx5_core_set_delay_drop(delay_drop->dev, delay_drop->timeout); 2670 if (err) { 2671 mlx5_ib_warn(delay_drop->dev, "Failed to set delay drop, timeout=%u\n", 2672 delay_drop->timeout); 2673 delay_drop->activate = false; 2674 } 2675 mutex_unlock(&delay_drop->lock); 2676 } 2677 2678 static void handle_general_event(struct mlx5_ib_dev *ibdev, struct mlx5_eqe *eqe, 2679 struct ib_event *ibev) 2680 { 2681 u32 port = (eqe->data.port.port >> 4) & 0xf; 2682 2683 switch (eqe->sub_type) { 2684 case MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT: 2685 if (mlx5_ib_port_link_layer(&ibdev->ib_dev, port) == 2686 IB_LINK_LAYER_ETHERNET) 2687 schedule_work(&ibdev->delay_drop.delay_drop_work); 2688 break; 2689 default: /* do nothing */ 2690 return; 2691 } 2692 } 2693 2694 static int handle_port_change(struct mlx5_ib_dev *ibdev, struct mlx5_eqe *eqe, 2695 struct ib_event *ibev) 2696 { 2697 u32 port = (eqe->data.port.port >> 4) & 0xf; 2698 2699 ibev->element.port_num = port; 2700 2701 switch (eqe->sub_type) { 2702 case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE: 2703 case MLX5_PORT_CHANGE_SUBTYPE_DOWN: 2704 case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED: 2705 /* In RoCE, port up/down events are handled in 2706 * mlx5_netdev_event(). 2707 */ 2708 if (mlx5_ib_port_link_layer(&ibdev->ib_dev, port) == 2709 IB_LINK_LAYER_ETHERNET) 2710 return -EINVAL; 2711 2712 ibev->event = (eqe->sub_type == MLX5_PORT_CHANGE_SUBTYPE_ACTIVE) ? 2713 IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR; 2714 break; 2715 2716 case MLX5_PORT_CHANGE_SUBTYPE_LID: 2717 ibev->event = IB_EVENT_LID_CHANGE; 2718 break; 2719 2720 case MLX5_PORT_CHANGE_SUBTYPE_PKEY: 2721 ibev->event = IB_EVENT_PKEY_CHANGE; 2722 schedule_work(&ibdev->devr.ports[port - 1].pkey_change_work); 2723 break; 2724 2725 case MLX5_PORT_CHANGE_SUBTYPE_GUID: 2726 ibev->event = IB_EVENT_GID_CHANGE; 2727 break; 2728 2729 case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG: 2730 ibev->event = IB_EVENT_CLIENT_REREGISTER; 2731 break; 2732 default: 2733 return -EINVAL; 2734 } 2735 2736 return 0; 2737 } 2738 2739 static void mlx5_ib_handle_event(struct work_struct *_work) 2740 { 2741 struct mlx5_ib_event_work *work = 2742 container_of(_work, struct mlx5_ib_event_work, work); 2743 struct mlx5_ib_dev *ibdev; 2744 struct ib_event ibev; 2745 bool fatal = false; 2746 2747 if (work->is_slave) { 2748 ibdev = mlx5_ib_get_ibdev_from_mpi(work->mpi); 2749 if (!ibdev) 2750 goto out; 2751 } else { 2752 ibdev = work->dev; 2753 } 2754 2755 switch (work->event) { 2756 case MLX5_DEV_EVENT_SYS_ERROR: 2757 ibev.event = IB_EVENT_DEVICE_FATAL; 2758 mlx5_ib_handle_internal_error(ibdev); 2759 ibev.element.port_num = (u8)(unsigned long)work->param; 2760 fatal = true; 2761 break; 2762 case MLX5_EVENT_TYPE_PORT_CHANGE: 2763 if (handle_port_change(ibdev, work->param, &ibev)) 2764 goto out; 2765 break; 2766 case MLX5_EVENT_TYPE_GENERAL_EVENT: 2767 handle_general_event(ibdev, work->param, &ibev); 2768 fallthrough; 2769 default: 2770 goto out; 2771 } 2772 2773 ibev.device = &ibdev->ib_dev; 2774 2775 if (!rdma_is_port_valid(&ibdev->ib_dev, ibev.element.port_num)) { 2776 mlx5_ib_warn(ibdev, "warning: event on port %d\n", ibev.element.port_num); 2777 goto out; 2778 } 2779 2780 if (ibdev->ib_active) 2781 ib_dispatch_event(&ibev); 2782 2783 if (fatal) 2784 ibdev->ib_active = false; 2785 out: 2786 kfree(work); 2787 } 2788 2789 static int mlx5_ib_event(struct notifier_block *nb, 2790 unsigned long event, void *param) 2791 { 2792 struct mlx5_ib_event_work *work; 2793 2794 work = kmalloc(sizeof(*work), GFP_ATOMIC); 2795 if (!work) 2796 return NOTIFY_DONE; 2797 2798 INIT_WORK(&work->work, mlx5_ib_handle_event); 2799 work->dev = container_of(nb, struct mlx5_ib_dev, mdev_events); 2800 work->is_slave = false; 2801 work->param = param; 2802 work->event = event; 2803 2804 queue_work(mlx5_ib_event_wq, &work->work); 2805 2806 return NOTIFY_OK; 2807 } 2808 2809 static int mlx5_ib_event_slave_port(struct notifier_block *nb, 2810 unsigned long event, void *param) 2811 { 2812 struct mlx5_ib_event_work *work; 2813 2814 work = kmalloc(sizeof(*work), GFP_ATOMIC); 2815 if (!work) 2816 return NOTIFY_DONE; 2817 2818 INIT_WORK(&work->work, mlx5_ib_handle_event); 2819 work->mpi = container_of(nb, struct mlx5_ib_multiport_info, mdev_events); 2820 work->is_slave = true; 2821 work->param = param; 2822 work->event = event; 2823 queue_work(mlx5_ib_event_wq, &work->work); 2824 2825 return NOTIFY_OK; 2826 } 2827 2828 static int mlx5_ib_get_plane_num(struct mlx5_core_dev *mdev, u8 *num_plane) 2829 { 2830 struct mlx5_hca_vport_context vport_ctx; 2831 int err; 2832 2833 *num_plane = 0; 2834 if (!MLX5_CAP_GEN(mdev, ib_virt)) 2835 return 0; 2836 2837 err = mlx5_query_hca_vport_context(mdev, 0, 1, 0, &vport_ctx); 2838 if (err) 2839 return err; 2840 2841 *num_plane = vport_ctx.num_plane; 2842 return 0; 2843 } 2844 2845 static int set_has_smi_cap(struct mlx5_ib_dev *dev) 2846 { 2847 struct mlx5_hca_vport_context vport_ctx; 2848 int err; 2849 int port; 2850 2851 if (MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_IB) 2852 return 0; 2853 2854 for (port = 1; port <= dev->num_ports; port++) { 2855 if (dev->num_plane) { 2856 dev->port_caps[port - 1].has_smi = false; 2857 continue; 2858 } else if (!MLX5_CAP_GEN(dev->mdev, ib_virt) || 2859 dev->ib_dev.type == RDMA_DEVICE_TYPE_SMI) { 2860 dev->port_caps[port - 1].has_smi = true; 2861 continue; 2862 } 2863 2864 err = mlx5_query_hca_vport_context(dev->mdev, 0, port, 0, 2865 &vport_ctx); 2866 if (err) { 2867 mlx5_ib_err(dev, "query_hca_vport_context for port=%d failed %d\n", 2868 port, err); 2869 return err; 2870 } 2871 dev->port_caps[port - 1].has_smi = vport_ctx.has_smi; 2872 } 2873 2874 return 0; 2875 } 2876 2877 static void get_ext_port_caps(struct mlx5_ib_dev *dev) 2878 { 2879 unsigned int port; 2880 2881 rdma_for_each_port (&dev->ib_dev, port) 2882 mlx5_query_ext_port_caps(dev, port); 2883 } 2884 2885 static u8 mlx5_get_umr_fence(u8 umr_fence_cap) 2886 { 2887 switch (umr_fence_cap) { 2888 case MLX5_CAP_UMR_FENCE_NONE: 2889 return MLX5_FENCE_MODE_NONE; 2890 case MLX5_CAP_UMR_FENCE_SMALL: 2891 return MLX5_FENCE_MODE_INITIATOR_SMALL; 2892 default: 2893 return MLX5_FENCE_MODE_STRONG_ORDERING; 2894 } 2895 } 2896 2897 int mlx5_ib_dev_res_cq_init(struct mlx5_ib_dev *dev) 2898 { 2899 struct mlx5_ib_resources *devr = &dev->devr; 2900 struct ib_cq_init_attr cq_attr = {.cqe = 1}; 2901 struct ib_device *ibdev; 2902 struct ib_pd *pd; 2903 struct ib_cq *cq; 2904 int ret = 0; 2905 2906 2907 /* 2908 * devr->c0 is set once, never changed until device unload. 2909 * Avoid taking the mutex if initialization is already done. 2910 */ 2911 if (devr->c0) 2912 return 0; 2913 2914 mutex_lock(&devr->cq_lock); 2915 if (devr->c0) 2916 goto unlock; 2917 2918 ibdev = &dev->ib_dev; 2919 pd = ib_alloc_pd(ibdev, 0); 2920 if (IS_ERR(pd)) { 2921 ret = PTR_ERR(pd); 2922 mlx5_ib_err(dev, "Couldn't allocate PD for res init, err=%d\n", ret); 2923 goto unlock; 2924 } 2925 2926 cq = ib_create_cq(ibdev, NULL, NULL, NULL, &cq_attr); 2927 if (IS_ERR(cq)) { 2928 ret = PTR_ERR(cq); 2929 mlx5_ib_err(dev, "Couldn't create CQ for res init, err=%d\n", ret); 2930 ib_dealloc_pd(pd); 2931 goto unlock; 2932 } 2933 2934 devr->p0 = pd; 2935 devr->c0 = cq; 2936 2937 unlock: 2938 mutex_unlock(&devr->cq_lock); 2939 return ret; 2940 } 2941 2942 int mlx5_ib_dev_res_srq_init(struct mlx5_ib_dev *dev) 2943 { 2944 struct mlx5_ib_resources *devr = &dev->devr; 2945 struct ib_srq_init_attr attr; 2946 struct ib_srq *s0, *s1; 2947 int ret = 0; 2948 2949 /* 2950 * devr->s1 is set once, never changed until device unload. 2951 * Avoid taking the mutex if initialization is already done. 2952 */ 2953 if (devr->s1) 2954 return 0; 2955 2956 mutex_lock(&devr->srq_lock); 2957 if (devr->s1) 2958 goto unlock; 2959 2960 ret = mlx5_ib_dev_res_cq_init(dev); 2961 if (ret) 2962 goto unlock; 2963 2964 memset(&attr, 0, sizeof(attr)); 2965 attr.attr.max_sge = 1; 2966 attr.attr.max_wr = 1; 2967 attr.srq_type = IB_SRQT_XRC; 2968 attr.ext.cq = devr->c0; 2969 2970 s0 = ib_create_srq(devr->p0, &attr); 2971 if (IS_ERR(s0)) { 2972 ret = PTR_ERR(s0); 2973 mlx5_ib_err(dev, "Couldn't create SRQ 0 for res init, err=%d\n", ret); 2974 goto unlock; 2975 } 2976 2977 memset(&attr, 0, sizeof(attr)); 2978 attr.attr.max_sge = 1; 2979 attr.attr.max_wr = 1; 2980 attr.srq_type = IB_SRQT_BASIC; 2981 2982 s1 = ib_create_srq(devr->p0, &attr); 2983 if (IS_ERR(s1)) { 2984 ret = PTR_ERR(s1); 2985 mlx5_ib_err(dev, "Couldn't create SRQ 1 for res init, err=%d\n", ret); 2986 ib_destroy_srq(s0); 2987 } 2988 2989 devr->s0 = s0; 2990 devr->s1 = s1; 2991 2992 unlock: 2993 mutex_unlock(&devr->srq_lock); 2994 return ret; 2995 } 2996 2997 static int mlx5_ib_dev_res_init(struct mlx5_ib_dev *dev) 2998 { 2999 struct mlx5_ib_resources *devr = &dev->devr; 3000 int port; 3001 int ret; 3002 3003 if (!MLX5_CAP_GEN(dev->mdev, xrc)) 3004 return -EOPNOTSUPP; 3005 3006 ret = mlx5_cmd_xrcd_alloc(dev->mdev, &devr->xrcdn0, 0); 3007 if (ret) 3008 return ret; 3009 3010 ret = mlx5_cmd_xrcd_alloc(dev->mdev, &devr->xrcdn1, 0); 3011 if (ret) { 3012 mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn0, 0); 3013 return ret; 3014 } 3015 3016 for (port = 0; port < ARRAY_SIZE(devr->ports); ++port) 3017 INIT_WORK(&devr->ports[port].pkey_change_work, 3018 pkey_change_handler); 3019 3020 mutex_init(&devr->cq_lock); 3021 mutex_init(&devr->srq_lock); 3022 3023 return 0; 3024 } 3025 3026 static void mlx5_ib_dev_res_cleanup(struct mlx5_ib_dev *dev) 3027 { 3028 struct mlx5_ib_resources *devr = &dev->devr; 3029 int port; 3030 3031 /* 3032 * Make sure no change P_Key work items are still executing. 3033 * 3034 * At this stage, the mlx5_ib_event should be unregistered 3035 * and it ensures that no new works are added. 3036 */ 3037 for (port = 0; port < ARRAY_SIZE(devr->ports); ++port) 3038 cancel_work_sync(&devr->ports[port].pkey_change_work); 3039 3040 /* After s0/s1 init, they are not unset during the device lifetime. */ 3041 if (devr->s1) { 3042 ib_destroy_srq(devr->s1); 3043 ib_destroy_srq(devr->s0); 3044 } 3045 mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn1, 0); 3046 mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn0, 0); 3047 /* After p0/c0 init, they are not unset during the device lifetime. */ 3048 if (devr->c0) { 3049 ib_destroy_cq(devr->c0); 3050 ib_dealloc_pd(devr->p0); 3051 } 3052 mutex_destroy(&devr->cq_lock); 3053 mutex_destroy(&devr->srq_lock); 3054 } 3055 3056 static int 3057 mlx5_ib_create_data_direct_resources(struct mlx5_ib_dev *dev) 3058 { 3059 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 3060 struct mlx5_core_dev *mdev = dev->mdev; 3061 void *mkc; 3062 u32 mkey; 3063 u32 pdn; 3064 u32 *in; 3065 int err; 3066 3067 err = mlx5_core_alloc_pd(mdev, &pdn); 3068 if (err) 3069 return err; 3070 3071 in = kvzalloc(inlen, GFP_KERNEL); 3072 if (!in) { 3073 err = -ENOMEM; 3074 goto err; 3075 } 3076 3077 MLX5_SET(create_mkey_in, in, data_direct, 1); 3078 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 3079 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA); 3080 MLX5_SET(mkc, mkc, lw, 1); 3081 MLX5_SET(mkc, mkc, lr, 1); 3082 MLX5_SET(mkc, mkc, rw, 1); 3083 MLX5_SET(mkc, mkc, rr, 1); 3084 MLX5_SET(mkc, mkc, a, 1); 3085 MLX5_SET(mkc, mkc, pd, pdn); 3086 MLX5_SET(mkc, mkc, length64, 1); 3087 MLX5_SET(mkc, mkc, qpn, 0xffffff); 3088 err = mlx5_core_create_mkey(mdev, &mkey, in, inlen); 3089 kvfree(in); 3090 if (err) 3091 goto err; 3092 3093 dev->ddr.mkey = mkey; 3094 dev->ddr.pdn = pdn; 3095 return 0; 3096 3097 err: 3098 mlx5_core_dealloc_pd(mdev, pdn); 3099 return err; 3100 } 3101 3102 static void 3103 mlx5_ib_free_data_direct_resources(struct mlx5_ib_dev *dev) 3104 { 3105 mlx5_core_destroy_mkey(dev->mdev, dev->ddr.mkey); 3106 mlx5_core_dealloc_pd(dev->mdev, dev->ddr.pdn); 3107 } 3108 3109 static u32 get_core_cap_flags(struct ib_device *ibdev, 3110 struct mlx5_hca_vport_context *rep) 3111 { 3112 struct mlx5_ib_dev *dev = to_mdev(ibdev); 3113 enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, 1); 3114 u8 l3_type_cap = MLX5_CAP_ROCE(dev->mdev, l3_type); 3115 u8 roce_version_cap = MLX5_CAP_ROCE(dev->mdev, roce_version); 3116 bool raw_support = !mlx5_core_mp_enabled(dev->mdev); 3117 u32 ret = 0; 3118 3119 if (rep->grh_required) 3120 ret |= RDMA_CORE_CAP_IB_GRH_REQUIRED; 3121 3122 if (dev->num_plane) 3123 return ret | RDMA_CORE_CAP_PROT_IB | RDMA_CORE_CAP_IB_MAD | 3124 RDMA_CORE_CAP_IB_CM | RDMA_CORE_CAP_IB_SA | 3125 RDMA_CORE_CAP_AF_IB; 3126 else if (ibdev->type == RDMA_DEVICE_TYPE_SMI) 3127 return ret | RDMA_CORE_CAP_IB_MAD | RDMA_CORE_CAP_IB_SMI; 3128 3129 if (ll == IB_LINK_LAYER_INFINIBAND) 3130 return ret | RDMA_CORE_PORT_IBA_IB; 3131 3132 if (raw_support) 3133 ret |= RDMA_CORE_PORT_RAW_PACKET; 3134 3135 if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP)) 3136 return ret; 3137 3138 if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV6_CAP)) 3139 return ret; 3140 3141 if (roce_version_cap & MLX5_ROCE_VERSION_1_CAP) 3142 ret |= RDMA_CORE_PORT_IBA_ROCE; 3143 3144 if (roce_version_cap & MLX5_ROCE_VERSION_2_CAP) 3145 ret |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; 3146 3147 return ret; 3148 } 3149 3150 static int mlx5_port_immutable(struct ib_device *ibdev, u32 port_num, 3151 struct ib_port_immutable *immutable) 3152 { 3153 struct ib_port_attr attr; 3154 struct mlx5_ib_dev *dev = to_mdev(ibdev); 3155 enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, port_num); 3156 struct mlx5_hca_vport_context rep = {0}; 3157 int err; 3158 3159 err = ib_query_port(ibdev, port_num, &attr); 3160 if (err) 3161 return err; 3162 3163 if (ll == IB_LINK_LAYER_INFINIBAND) { 3164 if (ibdev->type == RDMA_DEVICE_TYPE_SMI) 3165 port_num = smi_to_native_portnum(dev, port_num); 3166 3167 err = mlx5_query_hca_vport_context(dev->mdev, 0, port_num, 0, 3168 &rep); 3169 if (err) 3170 return err; 3171 } 3172 3173 immutable->pkey_tbl_len = attr.pkey_tbl_len; 3174 immutable->gid_tbl_len = attr.gid_tbl_len; 3175 immutable->core_cap_flags = get_core_cap_flags(ibdev, &rep); 3176 immutable->max_mad_size = IB_MGMT_MAD_SIZE; 3177 3178 return 0; 3179 } 3180 3181 static int mlx5_port_rep_immutable(struct ib_device *ibdev, u32 port_num, 3182 struct ib_port_immutable *immutable) 3183 { 3184 struct ib_port_attr attr; 3185 int err; 3186 3187 immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET; 3188 3189 err = ib_query_port(ibdev, port_num, &attr); 3190 if (err) 3191 return err; 3192 3193 immutable->pkey_tbl_len = attr.pkey_tbl_len; 3194 immutable->gid_tbl_len = attr.gid_tbl_len; 3195 immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET; 3196 3197 return 0; 3198 } 3199 3200 static void get_dev_fw_str(struct ib_device *ibdev, char *str) 3201 { 3202 struct mlx5_ib_dev *dev = 3203 container_of(ibdev, struct mlx5_ib_dev, ib_dev); 3204 snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%04d", 3205 fw_rev_maj(dev->mdev), fw_rev_min(dev->mdev), 3206 fw_rev_sub(dev->mdev)); 3207 } 3208 3209 static int lag_event(struct notifier_block *nb, unsigned long event, void *data) 3210 { 3211 struct mlx5_ib_dev *dev = container_of(nb, struct mlx5_ib_dev, 3212 lag_events); 3213 struct mlx5_core_dev *mdev = dev->mdev; 3214 struct mlx5_ib_port *port; 3215 struct net_device *ndev; 3216 int i, err; 3217 int portnum; 3218 3219 portnum = 0; 3220 switch (event) { 3221 case MLX5_DRIVER_EVENT_ACTIVE_BACKUP_LAG_CHANGE_LOWERSTATE: 3222 ndev = data; 3223 if (ndev) { 3224 if (!mlx5_lag_is_roce(mdev)) { 3225 // sriov lag 3226 for (i = 0; i < dev->num_ports; i++) { 3227 port = &dev->port[i]; 3228 if (port->rep && port->rep->vport == 3229 MLX5_VPORT_UPLINK) { 3230 portnum = i; 3231 break; 3232 } 3233 } 3234 } 3235 err = ib_device_set_netdev(&dev->ib_dev, ndev, 3236 portnum + 1); 3237 dev_put(ndev); 3238 if (err) 3239 return err; 3240 /* Rescan gids after new netdev assignment */ 3241 rdma_roce_rescan_device(&dev->ib_dev); 3242 } 3243 break; 3244 default: 3245 return NOTIFY_DONE; 3246 } 3247 return NOTIFY_OK; 3248 } 3249 3250 static void mlx5e_lag_event_register(struct mlx5_ib_dev *dev) 3251 { 3252 dev->lag_events.notifier_call = lag_event; 3253 blocking_notifier_chain_register(&dev->mdev->priv.lag_nh, 3254 &dev->lag_events); 3255 } 3256 3257 static void mlx5e_lag_event_unregister(struct mlx5_ib_dev *dev) 3258 { 3259 blocking_notifier_chain_unregister(&dev->mdev->priv.lag_nh, 3260 &dev->lag_events); 3261 } 3262 3263 static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev) 3264 { 3265 struct mlx5_core_dev *mdev = dev->mdev; 3266 struct mlx5_flow_namespace *ns = mlx5_get_flow_namespace(mdev, 3267 MLX5_FLOW_NAMESPACE_LAG); 3268 struct mlx5_flow_table *ft; 3269 int err; 3270 3271 if (!ns || !mlx5_lag_is_active(mdev)) 3272 return 0; 3273 3274 err = mlx5_cmd_create_vport_lag(mdev); 3275 if (err) 3276 return err; 3277 3278 ft = mlx5_create_lag_demux_flow_table(ns, 0, 0); 3279 if (IS_ERR(ft)) { 3280 err = PTR_ERR(ft); 3281 goto err_destroy_vport_lag; 3282 } 3283 3284 mlx5e_lag_event_register(dev); 3285 dev->flow_db->lag_demux_ft = ft; 3286 dev->lag_ports = mlx5_lag_get_num_ports(mdev); 3287 dev->lag_active = true; 3288 return 0; 3289 3290 err_destroy_vport_lag: 3291 mlx5_cmd_destroy_vport_lag(mdev); 3292 return err; 3293 } 3294 3295 static void mlx5_eth_lag_cleanup(struct mlx5_ib_dev *dev) 3296 { 3297 struct mlx5_core_dev *mdev = dev->mdev; 3298 3299 if (dev->lag_active) { 3300 dev->lag_active = false; 3301 3302 mlx5e_lag_event_unregister(dev); 3303 mlx5_destroy_flow_table(dev->flow_db->lag_demux_ft); 3304 dev->flow_db->lag_demux_ft = NULL; 3305 3306 mlx5_cmd_destroy_vport_lag(mdev); 3307 } 3308 } 3309 3310 static void mlx5_netdev_notifier_register(struct mlx5_roce *roce, 3311 struct net_device *netdev) 3312 { 3313 int err; 3314 3315 if (roce->tracking_netdev) 3316 return; 3317 roce->tracking_netdev = netdev; 3318 roce->nb.notifier_call = mlx5_netdev_event; 3319 err = register_netdevice_notifier_dev_net(netdev, &roce->nb, &roce->nn); 3320 WARN_ON(err); 3321 } 3322 3323 static void mlx5_netdev_notifier_unregister(struct mlx5_roce *roce) 3324 { 3325 if (!roce->tracking_netdev) 3326 return; 3327 unregister_netdevice_notifier_dev_net(roce->tracking_netdev, &roce->nb, 3328 &roce->nn); 3329 roce->tracking_netdev = NULL; 3330 } 3331 3332 static int mlx5e_mdev_notifier_event(struct notifier_block *nb, 3333 unsigned long event, void *data) 3334 { 3335 struct mlx5_roce *roce = container_of(nb, struct mlx5_roce, mdev_nb); 3336 struct net_device *netdev = data; 3337 3338 switch (event) { 3339 case MLX5_DRIVER_EVENT_UPLINK_NETDEV: 3340 if (netdev) 3341 mlx5_netdev_notifier_register(roce, netdev); 3342 else 3343 mlx5_netdev_notifier_unregister(roce); 3344 break; 3345 default: 3346 return NOTIFY_DONE; 3347 } 3348 3349 return NOTIFY_OK; 3350 } 3351 3352 static void mlx5_mdev_netdev_track(struct mlx5_ib_dev *dev, u32 port_num) 3353 { 3354 struct mlx5_roce *roce = &dev->port[port_num].roce; 3355 3356 roce->mdev_nb.notifier_call = mlx5e_mdev_notifier_event; 3357 mlx5_blocking_notifier_register(dev->mdev, &roce->mdev_nb); 3358 mlx5_core_uplink_netdev_event_replay(dev->mdev); 3359 } 3360 3361 static void mlx5_mdev_netdev_untrack(struct mlx5_ib_dev *dev, u32 port_num) 3362 { 3363 struct mlx5_roce *roce = &dev->port[port_num].roce; 3364 3365 mlx5_blocking_notifier_unregister(dev->mdev, &roce->mdev_nb); 3366 mlx5_netdev_notifier_unregister(roce); 3367 } 3368 3369 static int mlx5_enable_eth(struct mlx5_ib_dev *dev) 3370 { 3371 int err; 3372 3373 if (!dev->is_rep && dev->profile != &raw_eth_profile) { 3374 err = mlx5_nic_vport_enable_roce(dev->mdev); 3375 if (err) 3376 return err; 3377 } 3378 3379 err = mlx5_eth_lag_init(dev); 3380 if (err) 3381 goto err_disable_roce; 3382 3383 return 0; 3384 3385 err_disable_roce: 3386 if (!dev->is_rep && dev->profile != &raw_eth_profile) 3387 mlx5_nic_vport_disable_roce(dev->mdev); 3388 3389 return err; 3390 } 3391 3392 static void mlx5_disable_eth(struct mlx5_ib_dev *dev) 3393 { 3394 mlx5_eth_lag_cleanup(dev); 3395 if (!dev->is_rep && dev->profile != &raw_eth_profile) 3396 mlx5_nic_vport_disable_roce(dev->mdev); 3397 } 3398 3399 static int mlx5_ib_rn_get_params(struct ib_device *device, u32 port_num, 3400 enum rdma_netdev_t type, 3401 struct rdma_netdev_alloc_params *params) 3402 { 3403 if (type != RDMA_NETDEV_IPOIB) 3404 return -EOPNOTSUPP; 3405 3406 return mlx5_rdma_rn_get_params(to_mdev(device)->mdev, device, params); 3407 } 3408 3409 static ssize_t delay_drop_timeout_read(struct file *filp, char __user *buf, 3410 size_t count, loff_t *pos) 3411 { 3412 struct mlx5_ib_delay_drop *delay_drop = filp->private_data; 3413 char lbuf[20]; 3414 int len; 3415 3416 len = snprintf(lbuf, sizeof(lbuf), "%u\n", delay_drop->timeout); 3417 return simple_read_from_buffer(buf, count, pos, lbuf, len); 3418 } 3419 3420 static ssize_t delay_drop_timeout_write(struct file *filp, const char __user *buf, 3421 size_t count, loff_t *pos) 3422 { 3423 struct mlx5_ib_delay_drop *delay_drop = filp->private_data; 3424 u32 timeout; 3425 u32 var; 3426 3427 if (kstrtouint_from_user(buf, count, 0, &var)) 3428 return -EFAULT; 3429 3430 timeout = min_t(u32, roundup(var, 100), MLX5_MAX_DELAY_DROP_TIMEOUT_MS * 3431 1000); 3432 if (timeout != var) 3433 mlx5_ib_dbg(delay_drop->dev, "Round delay drop timeout to %u usec\n", 3434 timeout); 3435 3436 delay_drop->timeout = timeout; 3437 3438 return count; 3439 } 3440 3441 static const struct file_operations fops_delay_drop_timeout = { 3442 .owner = THIS_MODULE, 3443 .open = simple_open, 3444 .write = delay_drop_timeout_write, 3445 .read = delay_drop_timeout_read, 3446 }; 3447 3448 static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev, 3449 struct mlx5_ib_multiport_info *mpi) 3450 { 3451 u32 port_num = mlx5_core_native_port_num(mpi->mdev) - 1; 3452 struct mlx5_ib_port *port = &ibdev->port[port_num]; 3453 int comps; 3454 int err; 3455 int i; 3456 3457 lockdep_assert_held(&mlx5_ib_multiport_mutex); 3458 3459 mlx5_core_mp_event_replay(ibdev->mdev, 3460 MLX5_DRIVER_EVENT_AFFILIATION_REMOVED, 3461 NULL); 3462 mlx5_core_mp_event_replay(mpi->mdev, 3463 MLX5_DRIVER_EVENT_AFFILIATION_REMOVED, 3464 NULL); 3465 3466 mlx5_ib_cleanup_cong_debugfs(ibdev, port_num); 3467 3468 spin_lock(&port->mp.mpi_lock); 3469 if (!mpi->ibdev) { 3470 spin_unlock(&port->mp.mpi_lock); 3471 return; 3472 } 3473 3474 mpi->ibdev = NULL; 3475 3476 spin_unlock(&port->mp.mpi_lock); 3477 if (mpi->mdev_events.notifier_call) 3478 mlx5_notifier_unregister(mpi->mdev, &mpi->mdev_events); 3479 mpi->mdev_events.notifier_call = NULL; 3480 mlx5_mdev_netdev_untrack(ibdev, port_num); 3481 spin_lock(&port->mp.mpi_lock); 3482 3483 comps = mpi->mdev_refcnt; 3484 if (comps) { 3485 mpi->unaffiliate = true; 3486 init_completion(&mpi->unref_comp); 3487 spin_unlock(&port->mp.mpi_lock); 3488 3489 for (i = 0; i < comps; i++) 3490 wait_for_completion(&mpi->unref_comp); 3491 3492 spin_lock(&port->mp.mpi_lock); 3493 mpi->unaffiliate = false; 3494 } 3495 3496 port->mp.mpi = NULL; 3497 3498 spin_unlock(&port->mp.mpi_lock); 3499 3500 err = mlx5_nic_vport_unaffiliate_multiport(mpi->mdev); 3501 3502 mlx5_ib_dbg(ibdev, "unaffiliated port %u\n", port_num + 1); 3503 /* Log an error, still needed to cleanup the pointers and add 3504 * it back to the list. 3505 */ 3506 if (err) 3507 mlx5_ib_err(ibdev, "Failed to unaffiliate port %u\n", 3508 port_num + 1); 3509 3510 ibdev->port[port_num].roce.last_port_state = IB_PORT_DOWN; 3511 } 3512 3513 static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev, 3514 struct mlx5_ib_multiport_info *mpi) 3515 { 3516 u32 port_num = mlx5_core_native_port_num(mpi->mdev) - 1; 3517 u64 key; 3518 int err; 3519 3520 lockdep_assert_held(&mlx5_ib_multiport_mutex); 3521 3522 spin_lock(&ibdev->port[port_num].mp.mpi_lock); 3523 if (ibdev->port[port_num].mp.mpi) { 3524 mlx5_ib_dbg(ibdev, "port %u already affiliated.\n", 3525 port_num + 1); 3526 spin_unlock(&ibdev->port[port_num].mp.mpi_lock); 3527 return false; 3528 } 3529 3530 ibdev->port[port_num].mp.mpi = mpi; 3531 mpi->ibdev = ibdev; 3532 mpi->mdev_events.notifier_call = NULL; 3533 spin_unlock(&ibdev->port[port_num].mp.mpi_lock); 3534 3535 err = mlx5_nic_vport_affiliate_multiport(ibdev->mdev, mpi->mdev); 3536 if (err) 3537 goto unbind; 3538 3539 mlx5_mdev_netdev_track(ibdev, port_num); 3540 3541 mpi->mdev_events.notifier_call = mlx5_ib_event_slave_port; 3542 mlx5_notifier_register(mpi->mdev, &mpi->mdev_events); 3543 3544 mlx5_ib_init_cong_debugfs(ibdev, port_num); 3545 3546 key = mpi->mdev->priv.adev_idx; 3547 mlx5_core_mp_event_replay(mpi->mdev, 3548 MLX5_DRIVER_EVENT_AFFILIATION_DONE, 3549 &key); 3550 mlx5_core_mp_event_replay(ibdev->mdev, 3551 MLX5_DRIVER_EVENT_AFFILIATION_DONE, 3552 &key); 3553 3554 return true; 3555 3556 unbind: 3557 mlx5_ib_unbind_slave_port(ibdev, mpi); 3558 return false; 3559 } 3560 3561 static int mlx5_ib_data_direct_init(struct mlx5_ib_dev *dev) 3562 { 3563 char vuid[MLX5_ST_SZ_BYTES(array1024_auto) + 1] = {}; 3564 int ret; 3565 3566 if (!MLX5_CAP_GEN(dev->mdev, data_direct) || 3567 !MLX5_CAP_GEN_2(dev->mdev, query_vuid)) 3568 return 0; 3569 3570 ret = mlx5_cmd_query_vuid(dev->mdev, true, vuid); 3571 if (ret) 3572 return ret; 3573 3574 ret = mlx5_ib_create_data_direct_resources(dev); 3575 if (ret) 3576 return ret; 3577 3578 INIT_LIST_HEAD(&dev->data_direct_mr_list); 3579 ret = mlx5_data_direct_ib_reg(dev, vuid); 3580 if (ret) 3581 mlx5_ib_free_data_direct_resources(dev); 3582 3583 return ret; 3584 } 3585 3586 static void mlx5_ib_data_direct_cleanup(struct mlx5_ib_dev *dev) 3587 { 3588 if (!MLX5_CAP_GEN(dev->mdev, data_direct) || 3589 !MLX5_CAP_GEN_2(dev->mdev, query_vuid)) 3590 return; 3591 3592 mlx5_data_direct_ib_unreg(dev); 3593 mlx5_ib_free_data_direct_resources(dev); 3594 } 3595 3596 static int mlx5_ib_init_multiport_master(struct mlx5_ib_dev *dev) 3597 { 3598 u32 port_num = mlx5_core_native_port_num(dev->mdev) - 1; 3599 enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 3600 port_num + 1); 3601 struct mlx5_ib_multiport_info *mpi; 3602 int err; 3603 u32 i; 3604 3605 if (!mlx5_core_is_mp_master(dev->mdev) || ll != IB_LINK_LAYER_ETHERNET) 3606 return 0; 3607 3608 err = mlx5_query_nic_vport_system_image_guid(dev->mdev, 3609 &dev->sys_image_guid); 3610 if (err) 3611 return err; 3612 3613 err = mlx5_nic_vport_enable_roce(dev->mdev); 3614 if (err) 3615 return err; 3616 3617 mutex_lock(&mlx5_ib_multiport_mutex); 3618 for (i = 0; i < dev->num_ports; i++) { 3619 bool bound = false; 3620 3621 /* build a stub multiport info struct for the native port. */ 3622 if (i == port_num) { 3623 mpi = kzalloc(sizeof(*mpi), GFP_KERNEL); 3624 if (!mpi) { 3625 mutex_unlock(&mlx5_ib_multiport_mutex); 3626 mlx5_nic_vport_disable_roce(dev->mdev); 3627 return -ENOMEM; 3628 } 3629 3630 mpi->is_master = true; 3631 mpi->mdev = dev->mdev; 3632 mpi->sys_image_guid = dev->sys_image_guid; 3633 dev->port[i].mp.mpi = mpi; 3634 mpi->ibdev = dev; 3635 mpi = NULL; 3636 continue; 3637 } 3638 3639 list_for_each_entry(mpi, &mlx5_ib_unaffiliated_port_list, 3640 list) { 3641 if (dev->sys_image_guid == mpi->sys_image_guid && 3642 (mlx5_core_native_port_num(mpi->mdev) - 1) == i) { 3643 bound = mlx5_ib_bind_slave_port(dev, mpi); 3644 } 3645 3646 if (bound) { 3647 dev_dbg(mpi->mdev->device, 3648 "removing port from unaffiliated list.\n"); 3649 mlx5_ib_dbg(dev, "port %d bound\n", i + 1); 3650 list_del(&mpi->list); 3651 break; 3652 } 3653 } 3654 if (!bound) 3655 mlx5_ib_dbg(dev, "no free port found for port %d\n", 3656 i + 1); 3657 } 3658 3659 list_add_tail(&dev->ib_dev_list, &mlx5_ib_dev_list); 3660 mutex_unlock(&mlx5_ib_multiport_mutex); 3661 return err; 3662 } 3663 3664 static void mlx5_ib_cleanup_multiport_master(struct mlx5_ib_dev *dev) 3665 { 3666 u32 port_num = mlx5_core_native_port_num(dev->mdev) - 1; 3667 enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 3668 port_num + 1); 3669 u32 i; 3670 3671 if (!mlx5_core_is_mp_master(dev->mdev) || ll != IB_LINK_LAYER_ETHERNET) 3672 return; 3673 3674 mutex_lock(&mlx5_ib_multiport_mutex); 3675 for (i = 0; i < dev->num_ports; i++) { 3676 if (dev->port[i].mp.mpi) { 3677 /* Destroy the native port stub */ 3678 if (i == port_num) { 3679 kfree(dev->port[i].mp.mpi); 3680 dev->port[i].mp.mpi = NULL; 3681 } else { 3682 mlx5_ib_dbg(dev, "unbinding port_num: %u\n", 3683 i + 1); 3684 list_add_tail(&dev->port[i].mp.mpi->list, 3685 &mlx5_ib_unaffiliated_port_list); 3686 mlx5_ib_unbind_slave_port(dev, 3687 dev->port[i].mp.mpi); 3688 } 3689 } 3690 } 3691 3692 mlx5_ib_dbg(dev, "removing from devlist\n"); 3693 list_del(&dev->ib_dev_list); 3694 mutex_unlock(&mlx5_ib_multiport_mutex); 3695 3696 mlx5_nic_vport_disable_roce(dev->mdev); 3697 } 3698 3699 static int mmap_obj_cleanup(struct ib_uobject *uobject, 3700 enum rdma_remove_reason why, 3701 struct uverbs_attr_bundle *attrs) 3702 { 3703 struct mlx5_user_mmap_entry *obj = uobject->object; 3704 3705 rdma_user_mmap_entry_remove(&obj->rdma_entry); 3706 return 0; 3707 } 3708 3709 static int mlx5_rdma_user_mmap_entry_insert(struct mlx5_ib_ucontext *c, 3710 struct mlx5_user_mmap_entry *entry, 3711 size_t length) 3712 { 3713 return rdma_user_mmap_entry_insert_range( 3714 &c->ibucontext, &entry->rdma_entry, length, 3715 (MLX5_IB_MMAP_OFFSET_START << 16), 3716 ((MLX5_IB_MMAP_OFFSET_END << 16) + (1UL << 16) - 1)); 3717 } 3718 3719 static struct mlx5_user_mmap_entry * 3720 alloc_var_entry(struct mlx5_ib_ucontext *c) 3721 { 3722 struct mlx5_user_mmap_entry *entry; 3723 struct mlx5_var_table *var_table; 3724 u32 page_idx; 3725 int err; 3726 3727 var_table = &to_mdev(c->ibucontext.device)->var_table; 3728 entry = kzalloc(sizeof(*entry), GFP_KERNEL); 3729 if (!entry) 3730 return ERR_PTR(-ENOMEM); 3731 3732 mutex_lock(&var_table->bitmap_lock); 3733 page_idx = find_first_zero_bit(var_table->bitmap, 3734 var_table->num_var_hw_entries); 3735 if (page_idx >= var_table->num_var_hw_entries) { 3736 err = -ENOSPC; 3737 mutex_unlock(&var_table->bitmap_lock); 3738 goto end; 3739 } 3740 3741 set_bit(page_idx, var_table->bitmap); 3742 mutex_unlock(&var_table->bitmap_lock); 3743 3744 entry->address = var_table->hw_start_addr + 3745 (page_idx * var_table->stride_size); 3746 entry->page_idx = page_idx; 3747 entry->mmap_flag = MLX5_IB_MMAP_TYPE_VAR; 3748 3749 err = mlx5_rdma_user_mmap_entry_insert(c, entry, 3750 var_table->stride_size); 3751 if (err) 3752 goto err_insert; 3753 3754 return entry; 3755 3756 err_insert: 3757 mutex_lock(&var_table->bitmap_lock); 3758 clear_bit(page_idx, var_table->bitmap); 3759 mutex_unlock(&var_table->bitmap_lock); 3760 end: 3761 kfree(entry); 3762 return ERR_PTR(err); 3763 } 3764 3765 static int UVERBS_HANDLER(MLX5_IB_METHOD_VAR_OBJ_ALLOC)( 3766 struct uverbs_attr_bundle *attrs) 3767 { 3768 struct ib_uobject *uobj = uverbs_attr_get_uobject( 3769 attrs, MLX5_IB_ATTR_VAR_OBJ_ALLOC_HANDLE); 3770 struct mlx5_ib_ucontext *c; 3771 struct mlx5_user_mmap_entry *entry; 3772 u64 mmap_offset; 3773 u32 length; 3774 int err; 3775 3776 c = to_mucontext(ib_uverbs_get_ucontext(attrs)); 3777 if (IS_ERR(c)) 3778 return PTR_ERR(c); 3779 3780 entry = alloc_var_entry(c); 3781 if (IS_ERR(entry)) 3782 return PTR_ERR(entry); 3783 3784 mmap_offset = mlx5_entry_to_mmap_offset(entry); 3785 length = entry->rdma_entry.npages * PAGE_SIZE; 3786 uobj->object = entry; 3787 uverbs_finalize_uobj_create(attrs, MLX5_IB_ATTR_VAR_OBJ_ALLOC_HANDLE); 3788 3789 err = uverbs_copy_to(attrs, MLX5_IB_ATTR_VAR_OBJ_ALLOC_MMAP_OFFSET, 3790 &mmap_offset, sizeof(mmap_offset)); 3791 if (err) 3792 return err; 3793 3794 err = uverbs_copy_to(attrs, MLX5_IB_ATTR_VAR_OBJ_ALLOC_PAGE_ID, 3795 &entry->page_idx, sizeof(entry->page_idx)); 3796 if (err) 3797 return err; 3798 3799 err = uverbs_copy_to(attrs, MLX5_IB_ATTR_VAR_OBJ_ALLOC_MMAP_LENGTH, 3800 &length, sizeof(length)); 3801 return err; 3802 } 3803 3804 DECLARE_UVERBS_NAMED_METHOD( 3805 MLX5_IB_METHOD_VAR_OBJ_ALLOC, 3806 UVERBS_ATTR_IDR(MLX5_IB_ATTR_VAR_OBJ_ALLOC_HANDLE, 3807 MLX5_IB_OBJECT_VAR, 3808 UVERBS_ACCESS_NEW, 3809 UA_MANDATORY), 3810 UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_VAR_OBJ_ALLOC_PAGE_ID, 3811 UVERBS_ATTR_TYPE(u32), 3812 UA_MANDATORY), 3813 UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_VAR_OBJ_ALLOC_MMAP_LENGTH, 3814 UVERBS_ATTR_TYPE(u32), 3815 UA_MANDATORY), 3816 UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_VAR_OBJ_ALLOC_MMAP_OFFSET, 3817 UVERBS_ATTR_TYPE(u64), 3818 UA_MANDATORY)); 3819 3820 DECLARE_UVERBS_NAMED_METHOD_DESTROY( 3821 MLX5_IB_METHOD_VAR_OBJ_DESTROY, 3822 UVERBS_ATTR_IDR(MLX5_IB_ATTR_VAR_OBJ_DESTROY_HANDLE, 3823 MLX5_IB_OBJECT_VAR, 3824 UVERBS_ACCESS_DESTROY, 3825 UA_MANDATORY)); 3826 3827 DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_VAR, 3828 UVERBS_TYPE_ALLOC_IDR(mmap_obj_cleanup), 3829 &UVERBS_METHOD(MLX5_IB_METHOD_VAR_OBJ_ALLOC), 3830 &UVERBS_METHOD(MLX5_IB_METHOD_VAR_OBJ_DESTROY)); 3831 3832 static bool var_is_supported(struct ib_device *device) 3833 { 3834 struct mlx5_ib_dev *dev = to_mdev(device); 3835 3836 return (MLX5_CAP_GEN_64(dev->mdev, general_obj_types) & 3837 MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q); 3838 } 3839 3840 static struct mlx5_user_mmap_entry * 3841 alloc_uar_entry(struct mlx5_ib_ucontext *c, 3842 enum mlx5_ib_uapi_uar_alloc_type alloc_type) 3843 { 3844 struct mlx5_user_mmap_entry *entry; 3845 struct mlx5_ib_dev *dev; 3846 u32 uar_index; 3847 int err; 3848 3849 entry = kzalloc(sizeof(*entry), GFP_KERNEL); 3850 if (!entry) 3851 return ERR_PTR(-ENOMEM); 3852 3853 dev = to_mdev(c->ibucontext.device); 3854 err = mlx5_cmd_uar_alloc(dev->mdev, &uar_index, c->devx_uid); 3855 if (err) 3856 goto end; 3857 3858 entry->page_idx = uar_index; 3859 entry->address = uar_index2paddress(dev, uar_index); 3860 if (alloc_type == MLX5_IB_UAPI_UAR_ALLOC_TYPE_BF) 3861 entry->mmap_flag = MLX5_IB_MMAP_TYPE_UAR_WC; 3862 else 3863 entry->mmap_flag = MLX5_IB_MMAP_TYPE_UAR_NC; 3864 3865 err = mlx5_rdma_user_mmap_entry_insert(c, entry, PAGE_SIZE); 3866 if (err) 3867 goto err_insert; 3868 3869 return entry; 3870 3871 err_insert: 3872 mlx5_cmd_uar_dealloc(dev->mdev, uar_index, c->devx_uid); 3873 end: 3874 kfree(entry); 3875 return ERR_PTR(err); 3876 } 3877 3878 static int UVERBS_HANDLER(MLX5_IB_METHOD_UAR_OBJ_ALLOC)( 3879 struct uverbs_attr_bundle *attrs) 3880 { 3881 struct ib_uobject *uobj = uverbs_attr_get_uobject( 3882 attrs, MLX5_IB_ATTR_UAR_OBJ_ALLOC_HANDLE); 3883 enum mlx5_ib_uapi_uar_alloc_type alloc_type; 3884 struct mlx5_ib_ucontext *c; 3885 struct mlx5_user_mmap_entry *entry; 3886 u64 mmap_offset; 3887 u32 length; 3888 int err; 3889 3890 c = to_mucontext(ib_uverbs_get_ucontext(attrs)); 3891 if (IS_ERR(c)) 3892 return PTR_ERR(c); 3893 3894 err = uverbs_get_const(&alloc_type, attrs, 3895 MLX5_IB_ATTR_UAR_OBJ_ALLOC_TYPE); 3896 if (err) 3897 return err; 3898 3899 if (alloc_type != MLX5_IB_UAPI_UAR_ALLOC_TYPE_BF && 3900 alloc_type != MLX5_IB_UAPI_UAR_ALLOC_TYPE_NC) 3901 return -EOPNOTSUPP; 3902 3903 if (!mlx5_wc_support_get(to_mdev(c->ibucontext.device)->mdev) && 3904 alloc_type == MLX5_IB_UAPI_UAR_ALLOC_TYPE_BF) 3905 return -EOPNOTSUPP; 3906 3907 entry = alloc_uar_entry(c, alloc_type); 3908 if (IS_ERR(entry)) 3909 return PTR_ERR(entry); 3910 3911 mmap_offset = mlx5_entry_to_mmap_offset(entry); 3912 length = entry->rdma_entry.npages * PAGE_SIZE; 3913 uobj->object = entry; 3914 uverbs_finalize_uobj_create(attrs, MLX5_IB_ATTR_UAR_OBJ_ALLOC_HANDLE); 3915 3916 err = uverbs_copy_to(attrs, MLX5_IB_ATTR_UAR_OBJ_ALLOC_MMAP_OFFSET, 3917 &mmap_offset, sizeof(mmap_offset)); 3918 if (err) 3919 return err; 3920 3921 err = uverbs_copy_to(attrs, MLX5_IB_ATTR_UAR_OBJ_ALLOC_PAGE_ID, 3922 &entry->page_idx, sizeof(entry->page_idx)); 3923 if (err) 3924 return err; 3925 3926 err = uverbs_copy_to(attrs, MLX5_IB_ATTR_UAR_OBJ_ALLOC_MMAP_LENGTH, 3927 &length, sizeof(length)); 3928 return err; 3929 } 3930 3931 DECLARE_UVERBS_NAMED_METHOD( 3932 MLX5_IB_METHOD_UAR_OBJ_ALLOC, 3933 UVERBS_ATTR_IDR(MLX5_IB_ATTR_UAR_OBJ_ALLOC_HANDLE, 3934 MLX5_IB_OBJECT_UAR, 3935 UVERBS_ACCESS_NEW, 3936 UA_MANDATORY), 3937 UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_UAR_OBJ_ALLOC_TYPE, 3938 enum mlx5_ib_uapi_uar_alloc_type, 3939 UA_MANDATORY), 3940 UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_UAR_OBJ_ALLOC_PAGE_ID, 3941 UVERBS_ATTR_TYPE(u32), 3942 UA_MANDATORY), 3943 UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_UAR_OBJ_ALLOC_MMAP_LENGTH, 3944 UVERBS_ATTR_TYPE(u32), 3945 UA_MANDATORY), 3946 UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_UAR_OBJ_ALLOC_MMAP_OFFSET, 3947 UVERBS_ATTR_TYPE(u64), 3948 UA_MANDATORY)); 3949 3950 DECLARE_UVERBS_NAMED_METHOD_DESTROY( 3951 MLX5_IB_METHOD_UAR_OBJ_DESTROY, 3952 UVERBS_ATTR_IDR(MLX5_IB_ATTR_UAR_OBJ_DESTROY_HANDLE, 3953 MLX5_IB_OBJECT_UAR, 3954 UVERBS_ACCESS_DESTROY, 3955 UA_MANDATORY)); 3956 3957 DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_UAR, 3958 UVERBS_TYPE_ALLOC_IDR(mmap_obj_cleanup), 3959 &UVERBS_METHOD(MLX5_IB_METHOD_UAR_OBJ_ALLOC), 3960 &UVERBS_METHOD(MLX5_IB_METHOD_UAR_OBJ_DESTROY)); 3961 3962 ADD_UVERBS_ATTRIBUTES_SIMPLE( 3963 mlx5_ib_query_context, 3964 UVERBS_OBJECT_DEVICE, 3965 UVERBS_METHOD_QUERY_CONTEXT, 3966 UVERBS_ATTR_PTR_OUT( 3967 MLX5_IB_ATTR_QUERY_CONTEXT_RESP_UCTX, 3968 UVERBS_ATTR_STRUCT(struct mlx5_ib_alloc_ucontext_resp, 3969 dump_fill_mkey), 3970 UA_MANDATORY)); 3971 3972 ADD_UVERBS_ATTRIBUTES_SIMPLE( 3973 mlx5_ib_reg_dmabuf_mr, 3974 UVERBS_OBJECT_MR, 3975 UVERBS_METHOD_REG_DMABUF_MR, 3976 UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_REG_DMABUF_MR_ACCESS_FLAGS, 3977 enum mlx5_ib_uapi_reg_dmabuf_flags, 3978 UA_OPTIONAL)); 3979 3980 static const struct uapi_definition mlx5_ib_defs[] = { 3981 UAPI_DEF_CHAIN(mlx5_ib_devx_defs), 3982 UAPI_DEF_CHAIN(mlx5_ib_flow_defs), 3983 UAPI_DEF_CHAIN(mlx5_ib_qos_defs), 3984 UAPI_DEF_CHAIN(mlx5_ib_std_types_defs), 3985 UAPI_DEF_CHAIN(mlx5_ib_dm_defs), 3986 UAPI_DEF_CHAIN(mlx5_ib_create_cq_defs), 3987 3988 UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DEVICE, &mlx5_ib_query_context), 3989 UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_MR, &mlx5_ib_reg_dmabuf_mr), 3990 UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_VAR, 3991 UAPI_DEF_IS_OBJ_SUPPORTED(var_is_supported)), 3992 UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_UAR), 3993 {} 3994 }; 3995 3996 static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) 3997 { 3998 mlx5_ib_data_direct_cleanup(dev); 3999 mlx5_ib_cleanup_multiport_master(dev); 4000 WARN_ON(!xa_empty(&dev->odp_mkeys)); 4001 mutex_destroy(&dev->cap_mask_mutex); 4002 WARN_ON(!xa_empty(&dev->sig_mrs)); 4003 WARN_ON(!bitmap_empty(dev->dm.memic_alloc_pages, MLX5_MAX_MEMIC_PAGES)); 4004 mlx5r_macsec_dealloc_gids(dev); 4005 } 4006 4007 static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) 4008 { 4009 struct mlx5_core_dev *mdev = dev->mdev; 4010 int err, i; 4011 4012 dev->ib_dev.node_type = RDMA_NODE_IB_CA; 4013 dev->ib_dev.local_dma_lkey = 0 /* not supported for now */; 4014 dev->ib_dev.dev.parent = mdev->device; 4015 dev->ib_dev.lag_flags = RDMA_LAG_FLAGS_HASH_ALL_SLAVES; 4016 4017 for (i = 0; i < dev->num_ports; i++) { 4018 spin_lock_init(&dev->port[i].mp.mpi_lock); 4019 dev->port[i].roce.dev = dev; 4020 dev->port[i].roce.native_port_num = i + 1; 4021 dev->port[i].roce.last_port_state = IB_PORT_DOWN; 4022 } 4023 4024 err = mlx5r_cmd_query_special_mkeys(dev); 4025 if (err) 4026 return err; 4027 4028 err = mlx5r_macsec_init_gids_and_devlist(dev); 4029 if (err) 4030 return err; 4031 4032 err = mlx5_ib_init_multiport_master(dev); 4033 if (err) 4034 goto err; 4035 4036 err = set_has_smi_cap(dev); 4037 if (err) 4038 goto err_mp; 4039 4040 err = mlx5_query_max_pkeys(&dev->ib_dev, &dev->pkey_table_len); 4041 if (err) 4042 goto err_mp; 4043 4044 if (mlx5_use_mad_ifc(dev)) 4045 get_ext_port_caps(dev); 4046 4047 dev->ib_dev.num_comp_vectors = mlx5_comp_vectors_max(mdev); 4048 4049 mutex_init(&dev->cap_mask_mutex); 4050 mutex_init(&dev->data_direct_lock); 4051 INIT_LIST_HEAD(&dev->qp_list); 4052 spin_lock_init(&dev->reset_flow_resource_lock); 4053 xa_init(&dev->odp_mkeys); 4054 xa_init(&dev->sig_mrs); 4055 atomic_set(&dev->mkey_var, 0); 4056 4057 spin_lock_init(&dev->dm.lock); 4058 dev->dm.dev = mdev; 4059 err = mlx5_ib_data_direct_init(dev); 4060 if (err) 4061 goto err_mp; 4062 4063 return 0; 4064 err_mp: 4065 mlx5_ib_cleanup_multiport_master(dev); 4066 err: 4067 mlx5r_macsec_dealloc_gids(dev); 4068 return err; 4069 } 4070 4071 static struct ib_device *mlx5_ib_add_sub_dev(struct ib_device *parent, 4072 enum rdma_nl_dev_type type, 4073 const char *name); 4074 static void mlx5_ib_del_sub_dev(struct ib_device *sub_dev); 4075 4076 static const struct ib_device_ops mlx5_ib_dev_ops = { 4077 .owner = THIS_MODULE, 4078 .driver_id = RDMA_DRIVER_MLX5, 4079 .uverbs_abi_ver = MLX5_IB_UVERBS_ABI_VERSION, 4080 4081 .add_gid = mlx5_ib_add_gid, 4082 .add_sub_dev = mlx5_ib_add_sub_dev, 4083 .alloc_mr = mlx5_ib_alloc_mr, 4084 .alloc_mr_integrity = mlx5_ib_alloc_mr_integrity, 4085 .alloc_pd = mlx5_ib_alloc_pd, 4086 .alloc_ucontext = mlx5_ib_alloc_ucontext, 4087 .attach_mcast = mlx5_ib_mcg_attach, 4088 .check_mr_status = mlx5_ib_check_mr_status, 4089 .create_ah = mlx5_ib_create_ah, 4090 .create_cq = mlx5_ib_create_cq, 4091 .create_qp = mlx5_ib_create_qp, 4092 .create_srq = mlx5_ib_create_srq, 4093 .create_user_ah = mlx5_ib_create_ah, 4094 .dealloc_pd = mlx5_ib_dealloc_pd, 4095 .dealloc_ucontext = mlx5_ib_dealloc_ucontext, 4096 .del_gid = mlx5_ib_del_gid, 4097 .del_sub_dev = mlx5_ib_del_sub_dev, 4098 .dereg_mr = mlx5_ib_dereg_mr, 4099 .destroy_ah = mlx5_ib_destroy_ah, 4100 .destroy_cq = mlx5_ib_destroy_cq, 4101 .destroy_qp = mlx5_ib_destroy_qp, 4102 .destroy_srq = mlx5_ib_destroy_srq, 4103 .detach_mcast = mlx5_ib_mcg_detach, 4104 .disassociate_ucontext = mlx5_ib_disassociate_ucontext, 4105 .drain_rq = mlx5_ib_drain_rq, 4106 .drain_sq = mlx5_ib_drain_sq, 4107 .device_group = &mlx5_attr_group, 4108 .get_dev_fw_str = get_dev_fw_str, 4109 .get_dma_mr = mlx5_ib_get_dma_mr, 4110 .get_link_layer = mlx5_ib_port_link_layer, 4111 .map_mr_sg = mlx5_ib_map_mr_sg, 4112 .map_mr_sg_pi = mlx5_ib_map_mr_sg_pi, 4113 .mmap = mlx5_ib_mmap, 4114 .mmap_free = mlx5_ib_mmap_free, 4115 .modify_cq = mlx5_ib_modify_cq, 4116 .modify_device = mlx5_ib_modify_device, 4117 .modify_port = mlx5_ib_modify_port, 4118 .modify_qp = mlx5_ib_modify_qp, 4119 .modify_srq = mlx5_ib_modify_srq, 4120 .poll_cq = mlx5_ib_poll_cq, 4121 .post_recv = mlx5_ib_post_recv_nodrain, 4122 .post_send = mlx5_ib_post_send_nodrain, 4123 .post_srq_recv = mlx5_ib_post_srq_recv, 4124 .process_mad = mlx5_ib_process_mad, 4125 .query_ah = mlx5_ib_query_ah, 4126 .query_device = mlx5_ib_query_device, 4127 .query_gid = mlx5_ib_query_gid, 4128 .query_pkey = mlx5_ib_query_pkey, 4129 .query_qp = mlx5_ib_query_qp, 4130 .query_srq = mlx5_ib_query_srq, 4131 .query_ucontext = mlx5_ib_query_ucontext, 4132 .reg_user_mr = mlx5_ib_reg_user_mr, 4133 .reg_user_mr_dmabuf = mlx5_ib_reg_user_mr_dmabuf, 4134 .req_notify_cq = mlx5_ib_arm_cq, 4135 .rereg_user_mr = mlx5_ib_rereg_user_mr, 4136 .resize_cq = mlx5_ib_resize_cq, 4137 4138 INIT_RDMA_OBJ_SIZE(ib_ah, mlx5_ib_ah, ibah), 4139 INIT_RDMA_OBJ_SIZE(ib_counters, mlx5_ib_mcounters, ibcntrs), 4140 INIT_RDMA_OBJ_SIZE(ib_cq, mlx5_ib_cq, ibcq), 4141 INIT_RDMA_OBJ_SIZE(ib_pd, mlx5_ib_pd, ibpd), 4142 INIT_RDMA_OBJ_SIZE(ib_qp, mlx5_ib_qp, ibqp), 4143 INIT_RDMA_OBJ_SIZE(ib_srq, mlx5_ib_srq, ibsrq), 4144 INIT_RDMA_OBJ_SIZE(ib_ucontext, mlx5_ib_ucontext, ibucontext), 4145 }; 4146 4147 static const struct ib_device_ops mlx5_ib_dev_ipoib_enhanced_ops = { 4148 .rdma_netdev_get_params = mlx5_ib_rn_get_params, 4149 }; 4150 4151 static const struct ib_device_ops mlx5_ib_dev_sriov_ops = { 4152 .get_vf_config = mlx5_ib_get_vf_config, 4153 .get_vf_guid = mlx5_ib_get_vf_guid, 4154 .get_vf_stats = mlx5_ib_get_vf_stats, 4155 .set_vf_guid = mlx5_ib_set_vf_guid, 4156 .set_vf_link_state = mlx5_ib_set_vf_link_state, 4157 }; 4158 4159 static const struct ib_device_ops mlx5_ib_dev_mw_ops = { 4160 .alloc_mw = mlx5_ib_alloc_mw, 4161 .dealloc_mw = mlx5_ib_dealloc_mw, 4162 4163 INIT_RDMA_OBJ_SIZE(ib_mw, mlx5_ib_mw, ibmw), 4164 }; 4165 4166 static const struct ib_device_ops mlx5_ib_dev_xrc_ops = { 4167 .alloc_xrcd = mlx5_ib_alloc_xrcd, 4168 .dealloc_xrcd = mlx5_ib_dealloc_xrcd, 4169 4170 INIT_RDMA_OBJ_SIZE(ib_xrcd, mlx5_ib_xrcd, ibxrcd), 4171 }; 4172 4173 static int mlx5_ib_init_var_table(struct mlx5_ib_dev *dev) 4174 { 4175 struct mlx5_core_dev *mdev = dev->mdev; 4176 struct mlx5_var_table *var_table = &dev->var_table; 4177 u8 log_doorbell_bar_size; 4178 u8 log_doorbell_stride; 4179 u64 bar_size; 4180 4181 log_doorbell_bar_size = MLX5_CAP_DEV_VDPA_EMULATION(mdev, 4182 log_doorbell_bar_size); 4183 log_doorbell_stride = MLX5_CAP_DEV_VDPA_EMULATION(mdev, 4184 log_doorbell_stride); 4185 var_table->hw_start_addr = dev->mdev->bar_addr + 4186 MLX5_CAP64_DEV_VDPA_EMULATION(mdev, 4187 doorbell_bar_offset); 4188 bar_size = (1ULL << log_doorbell_bar_size) * 4096; 4189 var_table->stride_size = 1ULL << log_doorbell_stride; 4190 var_table->num_var_hw_entries = div_u64(bar_size, 4191 var_table->stride_size); 4192 mutex_init(&var_table->bitmap_lock); 4193 var_table->bitmap = bitmap_zalloc(var_table->num_var_hw_entries, 4194 GFP_KERNEL); 4195 return (var_table->bitmap) ? 0 : -ENOMEM; 4196 } 4197 4198 static void mlx5_ib_stage_caps_cleanup(struct mlx5_ib_dev *dev) 4199 { 4200 bitmap_free(dev->var_table.bitmap); 4201 } 4202 4203 static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) 4204 { 4205 struct mlx5_core_dev *mdev = dev->mdev; 4206 int err; 4207 4208 if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads) && 4209 IS_ENABLED(CONFIG_MLX5_CORE_IPOIB)) 4210 ib_set_device_ops(&dev->ib_dev, 4211 &mlx5_ib_dev_ipoib_enhanced_ops); 4212 4213 if (mlx5_core_is_pf(mdev)) 4214 ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_sriov_ops); 4215 4216 dev->umr_fence = mlx5_get_umr_fence(MLX5_CAP_GEN(mdev, umr_fence)); 4217 4218 if (MLX5_CAP_GEN(mdev, imaicl)) 4219 ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_mw_ops); 4220 4221 if (MLX5_CAP_GEN(mdev, xrc)) 4222 ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_xrc_ops); 4223 4224 if (MLX5_CAP_DEV_MEM(mdev, memic) || 4225 MLX5_CAP_GEN_64(dev->mdev, general_obj_types) & 4226 MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM) 4227 ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_dm_ops); 4228 4229 ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_ops); 4230 4231 if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)) 4232 dev->ib_dev.driver_def = mlx5_ib_defs; 4233 4234 err = init_node_data(dev); 4235 if (err) 4236 return err; 4237 4238 if ((MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && 4239 (MLX5_CAP_GEN(dev->mdev, disable_local_lb_uc) || 4240 MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc))) 4241 mutex_init(&dev->lb.mutex); 4242 4243 if (MLX5_CAP_GEN_64(dev->mdev, general_obj_types) & 4244 MLX5_GENERAL_OBJ_TYPES_CAP_VIRTIO_NET_Q) { 4245 err = mlx5_ib_init_var_table(dev); 4246 if (err) 4247 return err; 4248 } 4249 4250 dev->ib_dev.use_cq_dim = true; 4251 4252 return 0; 4253 } 4254 4255 static const struct ib_device_ops mlx5_ib_dev_port_ops = { 4256 .get_port_immutable = mlx5_port_immutable, 4257 .query_port = mlx5_ib_query_port, 4258 }; 4259 4260 static int mlx5_ib_stage_non_default_cb(struct mlx5_ib_dev *dev) 4261 { 4262 ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_port_ops); 4263 return 0; 4264 } 4265 4266 static const struct ib_device_ops mlx5_ib_dev_port_rep_ops = { 4267 .get_port_immutable = mlx5_port_rep_immutable, 4268 .query_port = mlx5_ib_rep_query_port, 4269 .query_pkey = mlx5_ib_rep_query_pkey, 4270 }; 4271 4272 static int mlx5_ib_stage_raw_eth_non_default_cb(struct mlx5_ib_dev *dev) 4273 { 4274 ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_port_rep_ops); 4275 return 0; 4276 } 4277 4278 static const struct ib_device_ops mlx5_ib_dev_common_roce_ops = { 4279 .create_rwq_ind_table = mlx5_ib_create_rwq_ind_table, 4280 .create_wq = mlx5_ib_create_wq, 4281 .destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table, 4282 .destroy_wq = mlx5_ib_destroy_wq, 4283 .modify_wq = mlx5_ib_modify_wq, 4284 4285 INIT_RDMA_OBJ_SIZE(ib_rwq_ind_table, mlx5_ib_rwq_ind_table, 4286 ib_rwq_ind_tbl), 4287 }; 4288 4289 static int mlx5_ib_roce_init(struct mlx5_ib_dev *dev) 4290 { 4291 struct mlx5_core_dev *mdev = dev->mdev; 4292 enum rdma_link_layer ll; 4293 int port_type_cap; 4294 u32 port_num = 0; 4295 int err; 4296 4297 port_type_cap = MLX5_CAP_GEN(mdev, port_type); 4298 ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); 4299 4300 if (ll == IB_LINK_LAYER_ETHERNET) { 4301 ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_common_roce_ops); 4302 4303 port_num = mlx5_core_native_port_num(dev->mdev) - 1; 4304 4305 /* Register only for native ports */ 4306 mlx5_mdev_netdev_track(dev, port_num); 4307 4308 err = mlx5_enable_eth(dev); 4309 if (err) 4310 goto cleanup; 4311 } 4312 4313 return 0; 4314 cleanup: 4315 mlx5_mdev_netdev_untrack(dev, port_num); 4316 return err; 4317 } 4318 4319 static void mlx5_ib_roce_cleanup(struct mlx5_ib_dev *dev) 4320 { 4321 struct mlx5_core_dev *mdev = dev->mdev; 4322 enum rdma_link_layer ll; 4323 int port_type_cap; 4324 u32 port_num; 4325 4326 port_type_cap = MLX5_CAP_GEN(mdev, port_type); 4327 ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); 4328 4329 if (ll == IB_LINK_LAYER_ETHERNET) { 4330 mlx5_disable_eth(dev); 4331 4332 port_num = mlx5_core_native_port_num(dev->mdev) - 1; 4333 mlx5_mdev_netdev_untrack(dev, port_num); 4334 } 4335 } 4336 4337 static int mlx5_ib_stage_cong_debugfs_init(struct mlx5_ib_dev *dev) 4338 { 4339 mlx5_ib_init_cong_debugfs(dev, 4340 mlx5_core_native_port_num(dev->mdev) - 1); 4341 return 0; 4342 } 4343 4344 static void mlx5_ib_stage_cong_debugfs_cleanup(struct mlx5_ib_dev *dev) 4345 { 4346 mlx5_ib_cleanup_cong_debugfs(dev, 4347 mlx5_core_native_port_num(dev->mdev) - 1); 4348 } 4349 4350 static int mlx5_ib_stage_uar_init(struct mlx5_ib_dev *dev) 4351 { 4352 dev->mdev->priv.uar = mlx5_get_uars_page(dev->mdev); 4353 return PTR_ERR_OR_ZERO(dev->mdev->priv.uar); 4354 } 4355 4356 static void mlx5_ib_stage_uar_cleanup(struct mlx5_ib_dev *dev) 4357 { 4358 mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar); 4359 } 4360 4361 static int mlx5_ib_stage_bfrag_init(struct mlx5_ib_dev *dev) 4362 { 4363 int err; 4364 4365 err = mlx5_alloc_bfreg(dev->mdev, &dev->bfreg, false, false); 4366 if (err) 4367 return err; 4368 4369 err = mlx5_alloc_bfreg(dev->mdev, &dev->fp_bfreg, false, true); 4370 if (err) 4371 mlx5_free_bfreg(dev->mdev, &dev->bfreg); 4372 4373 return err; 4374 } 4375 4376 static void mlx5_ib_stage_bfrag_cleanup(struct mlx5_ib_dev *dev) 4377 { 4378 mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg); 4379 mlx5_free_bfreg(dev->mdev, &dev->bfreg); 4380 } 4381 4382 static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev) 4383 { 4384 const char *name; 4385 4386 if (dev->sub_dev_name) { 4387 name = dev->sub_dev_name; 4388 ib_mark_name_assigned_by_user(&dev->ib_dev); 4389 } else if (!mlx5_lag_is_active(dev->mdev)) 4390 name = "mlx5_%d"; 4391 else 4392 name = "mlx5_bond_%d"; 4393 return ib_register_device(&dev->ib_dev, name, &dev->mdev->pdev->dev); 4394 } 4395 4396 static void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev) 4397 { 4398 mlx5_mkey_cache_cleanup(dev); 4399 mlx5r_umr_resource_cleanup(dev); 4400 mlx5r_umr_cleanup(dev); 4401 } 4402 4403 static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev) 4404 { 4405 ib_unregister_device(&dev->ib_dev); 4406 } 4407 4408 static int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev) 4409 { 4410 int ret; 4411 4412 ret = mlx5r_umr_init(dev); 4413 if (ret) 4414 return ret; 4415 4416 ret = mlx5_mkey_cache_init(dev); 4417 if (ret) 4418 mlx5_ib_warn(dev, "mr cache init failed %d\n", ret); 4419 return ret; 4420 } 4421 4422 static int mlx5_ib_stage_delay_drop_init(struct mlx5_ib_dev *dev) 4423 { 4424 struct dentry *root; 4425 4426 if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP)) 4427 return 0; 4428 4429 mutex_init(&dev->delay_drop.lock); 4430 dev->delay_drop.dev = dev; 4431 dev->delay_drop.activate = false; 4432 dev->delay_drop.timeout = MLX5_MAX_DELAY_DROP_TIMEOUT_MS * 1000; 4433 INIT_WORK(&dev->delay_drop.delay_drop_work, delay_drop_handler); 4434 atomic_set(&dev->delay_drop.rqs_cnt, 0); 4435 atomic_set(&dev->delay_drop.events_cnt, 0); 4436 4437 if (!mlx5_debugfs_root) 4438 return 0; 4439 4440 root = debugfs_create_dir("delay_drop", mlx5_debugfs_get_dev_root(dev->mdev)); 4441 dev->delay_drop.dir_debugfs = root; 4442 4443 debugfs_create_atomic_t("num_timeout_events", 0400, root, 4444 &dev->delay_drop.events_cnt); 4445 debugfs_create_atomic_t("num_rqs", 0400, root, 4446 &dev->delay_drop.rqs_cnt); 4447 debugfs_create_file("timeout", 0600, root, &dev->delay_drop, 4448 &fops_delay_drop_timeout); 4449 return 0; 4450 } 4451 4452 static void mlx5_ib_stage_delay_drop_cleanup(struct mlx5_ib_dev *dev) 4453 { 4454 if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP)) 4455 return; 4456 4457 cancel_work_sync(&dev->delay_drop.delay_drop_work); 4458 if (!dev->delay_drop.dir_debugfs) 4459 return; 4460 4461 debugfs_remove_recursive(dev->delay_drop.dir_debugfs); 4462 dev->delay_drop.dir_debugfs = NULL; 4463 } 4464 4465 static int mlx5_ib_stage_dev_notifier_init(struct mlx5_ib_dev *dev) 4466 { 4467 dev->mdev_events.notifier_call = mlx5_ib_event; 4468 mlx5_notifier_register(dev->mdev, &dev->mdev_events); 4469 4470 mlx5r_macsec_event_register(dev); 4471 4472 return 0; 4473 } 4474 4475 static void mlx5_ib_stage_dev_notifier_cleanup(struct mlx5_ib_dev *dev) 4476 { 4477 mlx5r_macsec_event_unregister(dev); 4478 mlx5_notifier_unregister(dev->mdev, &dev->mdev_events); 4479 } 4480 4481 void mlx5_ib_data_direct_bind(struct mlx5_ib_dev *ibdev, 4482 struct mlx5_data_direct_dev *dev) 4483 { 4484 mutex_lock(&ibdev->data_direct_lock); 4485 ibdev->data_direct_dev = dev; 4486 mutex_unlock(&ibdev->data_direct_lock); 4487 } 4488 4489 void mlx5_ib_data_direct_unbind(struct mlx5_ib_dev *ibdev) 4490 { 4491 mutex_lock(&ibdev->data_direct_lock); 4492 mlx5_ib_revoke_data_direct_mrs(ibdev); 4493 ibdev->data_direct_dev = NULL; 4494 mutex_unlock(&ibdev->data_direct_lock); 4495 } 4496 4497 void __mlx5_ib_remove(struct mlx5_ib_dev *dev, 4498 const struct mlx5_ib_profile *profile, 4499 int stage) 4500 { 4501 dev->ib_active = false; 4502 4503 /* Number of stages to cleanup */ 4504 while (stage) { 4505 stage--; 4506 if (profile->stage[stage].cleanup) 4507 profile->stage[stage].cleanup(dev); 4508 } 4509 4510 kfree(dev->port); 4511 ib_dealloc_device(&dev->ib_dev); 4512 } 4513 4514 int __mlx5_ib_add(struct mlx5_ib_dev *dev, 4515 const struct mlx5_ib_profile *profile) 4516 { 4517 int err; 4518 int i; 4519 4520 dev->profile = profile; 4521 4522 for (i = 0; i < MLX5_IB_STAGE_MAX; i++) { 4523 if (profile->stage[i].init) { 4524 err = profile->stage[i].init(dev); 4525 if (err) 4526 goto err_out; 4527 } 4528 } 4529 4530 dev->ib_active = true; 4531 return 0; 4532 4533 err_out: 4534 /* Clean up stages which were initialized */ 4535 while (i) { 4536 i--; 4537 if (profile->stage[i].cleanup) 4538 profile->stage[i].cleanup(dev); 4539 } 4540 return -ENOMEM; 4541 } 4542 4543 static const struct mlx5_ib_profile pf_profile = { 4544 STAGE_CREATE(MLX5_IB_STAGE_INIT, 4545 mlx5_ib_stage_init_init, 4546 mlx5_ib_stage_init_cleanup), 4547 STAGE_CREATE(MLX5_IB_STAGE_FS, 4548 mlx5_ib_fs_init, 4549 mlx5_ib_fs_cleanup), 4550 STAGE_CREATE(MLX5_IB_STAGE_CAPS, 4551 mlx5_ib_stage_caps_init, 4552 mlx5_ib_stage_caps_cleanup), 4553 STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB, 4554 mlx5_ib_stage_non_default_cb, 4555 NULL), 4556 STAGE_CREATE(MLX5_IB_STAGE_ROCE, 4557 mlx5_ib_roce_init, 4558 mlx5_ib_roce_cleanup), 4559 STAGE_CREATE(MLX5_IB_STAGE_QP, 4560 mlx5_init_qp_table, 4561 mlx5_cleanup_qp_table), 4562 STAGE_CREATE(MLX5_IB_STAGE_SRQ, 4563 mlx5_init_srq_table, 4564 mlx5_cleanup_srq_table), 4565 STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES, 4566 mlx5_ib_dev_res_init, 4567 mlx5_ib_dev_res_cleanup), 4568 STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER, 4569 mlx5_ib_stage_dev_notifier_init, 4570 mlx5_ib_stage_dev_notifier_cleanup), 4571 STAGE_CREATE(MLX5_IB_STAGE_ODP, 4572 mlx5_ib_odp_init_one, 4573 mlx5_ib_odp_cleanup_one), 4574 STAGE_CREATE(MLX5_IB_STAGE_COUNTERS, 4575 mlx5_ib_counters_init, 4576 mlx5_ib_counters_cleanup), 4577 STAGE_CREATE(MLX5_IB_STAGE_CONG_DEBUGFS, 4578 mlx5_ib_stage_cong_debugfs_init, 4579 mlx5_ib_stage_cong_debugfs_cleanup), 4580 STAGE_CREATE(MLX5_IB_STAGE_UAR, 4581 mlx5_ib_stage_uar_init, 4582 mlx5_ib_stage_uar_cleanup), 4583 STAGE_CREATE(MLX5_IB_STAGE_BFREG, 4584 mlx5_ib_stage_bfrag_init, 4585 mlx5_ib_stage_bfrag_cleanup), 4586 STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR, 4587 NULL, 4588 mlx5_ib_stage_pre_ib_reg_umr_cleanup), 4589 STAGE_CREATE(MLX5_IB_STAGE_WHITELIST_UID, 4590 mlx5_ib_devx_init, 4591 mlx5_ib_devx_cleanup), 4592 STAGE_CREATE(MLX5_IB_STAGE_IB_REG, 4593 mlx5_ib_stage_ib_reg_init, 4594 mlx5_ib_stage_ib_reg_cleanup), 4595 STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR, 4596 mlx5_ib_stage_post_ib_reg_umr_init, 4597 NULL), 4598 STAGE_CREATE(MLX5_IB_STAGE_DELAY_DROP, 4599 mlx5_ib_stage_delay_drop_init, 4600 mlx5_ib_stage_delay_drop_cleanup), 4601 STAGE_CREATE(MLX5_IB_STAGE_RESTRACK, 4602 mlx5_ib_restrack_init, 4603 NULL), 4604 }; 4605 4606 const struct mlx5_ib_profile raw_eth_profile = { 4607 STAGE_CREATE(MLX5_IB_STAGE_INIT, 4608 mlx5_ib_stage_init_init, 4609 mlx5_ib_stage_init_cleanup), 4610 STAGE_CREATE(MLX5_IB_STAGE_FS, 4611 mlx5_ib_fs_init, 4612 mlx5_ib_fs_cleanup), 4613 STAGE_CREATE(MLX5_IB_STAGE_CAPS, 4614 mlx5_ib_stage_caps_init, 4615 mlx5_ib_stage_caps_cleanup), 4616 STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB, 4617 mlx5_ib_stage_raw_eth_non_default_cb, 4618 NULL), 4619 STAGE_CREATE(MLX5_IB_STAGE_ROCE, 4620 mlx5_ib_roce_init, 4621 mlx5_ib_roce_cleanup), 4622 STAGE_CREATE(MLX5_IB_STAGE_QP, 4623 mlx5_init_qp_table, 4624 mlx5_cleanup_qp_table), 4625 STAGE_CREATE(MLX5_IB_STAGE_SRQ, 4626 mlx5_init_srq_table, 4627 mlx5_cleanup_srq_table), 4628 STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES, 4629 mlx5_ib_dev_res_init, 4630 mlx5_ib_dev_res_cleanup), 4631 STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER, 4632 mlx5_ib_stage_dev_notifier_init, 4633 mlx5_ib_stage_dev_notifier_cleanup), 4634 STAGE_CREATE(MLX5_IB_STAGE_COUNTERS, 4635 mlx5_ib_counters_init, 4636 mlx5_ib_counters_cleanup), 4637 STAGE_CREATE(MLX5_IB_STAGE_CONG_DEBUGFS, 4638 mlx5_ib_stage_cong_debugfs_init, 4639 mlx5_ib_stage_cong_debugfs_cleanup), 4640 STAGE_CREATE(MLX5_IB_STAGE_UAR, 4641 mlx5_ib_stage_uar_init, 4642 mlx5_ib_stage_uar_cleanup), 4643 STAGE_CREATE(MLX5_IB_STAGE_BFREG, 4644 mlx5_ib_stage_bfrag_init, 4645 mlx5_ib_stage_bfrag_cleanup), 4646 STAGE_CREATE(MLX5_IB_STAGE_PRE_IB_REG_UMR, 4647 NULL, 4648 mlx5_ib_stage_pre_ib_reg_umr_cleanup), 4649 STAGE_CREATE(MLX5_IB_STAGE_WHITELIST_UID, 4650 mlx5_ib_devx_init, 4651 mlx5_ib_devx_cleanup), 4652 STAGE_CREATE(MLX5_IB_STAGE_IB_REG, 4653 mlx5_ib_stage_ib_reg_init, 4654 mlx5_ib_stage_ib_reg_cleanup), 4655 STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR, 4656 mlx5_ib_stage_post_ib_reg_umr_init, 4657 NULL), 4658 STAGE_CREATE(MLX5_IB_STAGE_DELAY_DROP, 4659 mlx5_ib_stage_delay_drop_init, 4660 mlx5_ib_stage_delay_drop_cleanup), 4661 STAGE_CREATE(MLX5_IB_STAGE_RESTRACK, 4662 mlx5_ib_restrack_init, 4663 NULL), 4664 }; 4665 4666 static const struct mlx5_ib_profile plane_profile = { 4667 STAGE_CREATE(MLX5_IB_STAGE_INIT, 4668 mlx5_ib_stage_init_init, 4669 mlx5_ib_stage_init_cleanup), 4670 STAGE_CREATE(MLX5_IB_STAGE_CAPS, 4671 mlx5_ib_stage_caps_init, 4672 mlx5_ib_stage_caps_cleanup), 4673 STAGE_CREATE(MLX5_IB_STAGE_NON_DEFAULT_CB, 4674 mlx5_ib_stage_non_default_cb, 4675 NULL), 4676 STAGE_CREATE(MLX5_IB_STAGE_QP, 4677 mlx5_init_qp_table, 4678 mlx5_cleanup_qp_table), 4679 STAGE_CREATE(MLX5_IB_STAGE_SRQ, 4680 mlx5_init_srq_table, 4681 mlx5_cleanup_srq_table), 4682 STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES, 4683 mlx5_ib_dev_res_init, 4684 mlx5_ib_dev_res_cleanup), 4685 STAGE_CREATE(MLX5_IB_STAGE_BFREG, 4686 mlx5_ib_stage_bfrag_init, 4687 mlx5_ib_stage_bfrag_cleanup), 4688 STAGE_CREATE(MLX5_IB_STAGE_IB_REG, 4689 mlx5_ib_stage_ib_reg_init, 4690 mlx5_ib_stage_ib_reg_cleanup), 4691 }; 4692 4693 static struct ib_device *mlx5_ib_add_sub_dev(struct ib_device *parent, 4694 enum rdma_nl_dev_type type, 4695 const char *name) 4696 { 4697 struct mlx5_ib_dev *mparent = to_mdev(parent), *mplane; 4698 enum rdma_link_layer ll; 4699 int ret; 4700 4701 if (mparent->smi_dev) 4702 return ERR_PTR(-EEXIST); 4703 4704 ll = mlx5_port_type_cap_to_rdma_ll(MLX5_CAP_GEN(mparent->mdev, 4705 port_type)); 4706 if (type != RDMA_DEVICE_TYPE_SMI || !mparent->num_plane || 4707 ll != IB_LINK_LAYER_INFINIBAND || 4708 !MLX5_CAP_GEN_2(mparent->mdev, multiplane_qp_ud)) 4709 return ERR_PTR(-EOPNOTSUPP); 4710 4711 mplane = ib_alloc_device(mlx5_ib_dev, ib_dev); 4712 if (!mplane) 4713 return ERR_PTR(-ENOMEM); 4714 4715 mplane->port = kcalloc(mparent->num_plane * mparent->num_ports, 4716 sizeof(*mplane->port), GFP_KERNEL); 4717 if (!mplane->port) { 4718 ret = -ENOMEM; 4719 goto fail_kcalloc; 4720 } 4721 4722 mplane->ib_dev.type = type; 4723 mplane->mdev = mparent->mdev; 4724 mplane->num_ports = mparent->num_plane; 4725 mplane->sub_dev_name = name; 4726 mplane->ib_dev.phys_port_cnt = mplane->num_ports; 4727 4728 ret = __mlx5_ib_add(mplane, &plane_profile); 4729 if (ret) 4730 goto fail_ib_add; 4731 4732 mparent->smi_dev = mplane; 4733 return &mplane->ib_dev; 4734 4735 fail_ib_add: 4736 kfree(mplane->port); 4737 fail_kcalloc: 4738 ib_dealloc_device(&mplane->ib_dev); 4739 return ERR_PTR(ret); 4740 } 4741 4742 static void mlx5_ib_del_sub_dev(struct ib_device *sub_dev) 4743 { 4744 struct mlx5_ib_dev *mdev = to_mdev(sub_dev); 4745 4746 to_mdev(sub_dev->parent)->smi_dev = NULL; 4747 __mlx5_ib_remove(mdev, mdev->profile, MLX5_IB_STAGE_MAX); 4748 } 4749 4750 static int mlx5r_mp_probe(struct auxiliary_device *adev, 4751 const struct auxiliary_device_id *id) 4752 { 4753 struct mlx5_adev *idev = container_of(adev, struct mlx5_adev, adev); 4754 struct mlx5_core_dev *mdev = idev->mdev; 4755 struct mlx5_ib_multiport_info *mpi; 4756 struct mlx5_ib_dev *dev; 4757 bool bound = false; 4758 int err; 4759 4760 mpi = kzalloc(sizeof(*mpi), GFP_KERNEL); 4761 if (!mpi) 4762 return -ENOMEM; 4763 4764 mpi->mdev = mdev; 4765 err = mlx5_query_nic_vport_system_image_guid(mdev, 4766 &mpi->sys_image_guid); 4767 if (err) { 4768 kfree(mpi); 4769 return err; 4770 } 4771 4772 mutex_lock(&mlx5_ib_multiport_mutex); 4773 list_for_each_entry(dev, &mlx5_ib_dev_list, ib_dev_list) { 4774 if (dev->sys_image_guid == mpi->sys_image_guid) 4775 bound = mlx5_ib_bind_slave_port(dev, mpi); 4776 4777 if (bound) { 4778 rdma_roce_rescan_device(&dev->ib_dev); 4779 mpi->ibdev->ib_active = true; 4780 break; 4781 } 4782 } 4783 4784 if (!bound) { 4785 list_add_tail(&mpi->list, &mlx5_ib_unaffiliated_port_list); 4786 dev_dbg(mdev->device, 4787 "no suitable IB device found to bind to, added to unaffiliated list.\n"); 4788 } 4789 mutex_unlock(&mlx5_ib_multiport_mutex); 4790 4791 auxiliary_set_drvdata(adev, mpi); 4792 return 0; 4793 } 4794 4795 static void mlx5r_mp_remove(struct auxiliary_device *adev) 4796 { 4797 struct mlx5_ib_multiport_info *mpi; 4798 4799 mpi = auxiliary_get_drvdata(adev); 4800 mutex_lock(&mlx5_ib_multiport_mutex); 4801 if (mpi->ibdev) 4802 mlx5_ib_unbind_slave_port(mpi->ibdev, mpi); 4803 else 4804 list_del(&mpi->list); 4805 mutex_unlock(&mlx5_ib_multiport_mutex); 4806 kfree(mpi); 4807 } 4808 4809 static int mlx5r_probe(struct auxiliary_device *adev, 4810 const struct auxiliary_device_id *id) 4811 { 4812 struct mlx5_adev *idev = container_of(adev, struct mlx5_adev, adev); 4813 struct mlx5_core_dev *mdev = idev->mdev; 4814 const struct mlx5_ib_profile *profile; 4815 int port_type_cap, num_ports, ret; 4816 enum rdma_link_layer ll; 4817 struct mlx5_ib_dev *dev; 4818 4819 port_type_cap = MLX5_CAP_GEN(mdev, port_type); 4820 ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); 4821 4822 num_ports = max(MLX5_CAP_GEN(mdev, num_ports), 4823 MLX5_CAP_GEN(mdev, num_vhca_ports)); 4824 dev = ib_alloc_device(mlx5_ib_dev, ib_dev); 4825 if (!dev) 4826 return -ENOMEM; 4827 4828 if (ll == IB_LINK_LAYER_INFINIBAND) { 4829 ret = mlx5_ib_get_plane_num(mdev, &dev->num_plane); 4830 if (ret) 4831 goto fail; 4832 } 4833 4834 dev->port = kcalloc(num_ports, sizeof(*dev->port), 4835 GFP_KERNEL); 4836 if (!dev->port) { 4837 ret = -ENOMEM; 4838 goto fail; 4839 } 4840 4841 dev->mdev = mdev; 4842 dev->num_ports = num_ports; 4843 dev->ib_dev.phys_port_cnt = num_ports; 4844 4845 if (ll == IB_LINK_LAYER_ETHERNET && !mlx5_get_roce_state(mdev)) 4846 profile = &raw_eth_profile; 4847 else 4848 profile = &pf_profile; 4849 4850 ret = __mlx5_ib_add(dev, profile); 4851 if (ret) 4852 goto fail_ib_add; 4853 4854 auxiliary_set_drvdata(adev, dev); 4855 return 0; 4856 4857 fail_ib_add: 4858 kfree(dev->port); 4859 fail: 4860 ib_dealloc_device(&dev->ib_dev); 4861 return ret; 4862 } 4863 4864 static void mlx5r_remove(struct auxiliary_device *adev) 4865 { 4866 struct mlx5_ib_dev *dev; 4867 4868 dev = auxiliary_get_drvdata(adev); 4869 __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX); 4870 } 4871 4872 static const struct auxiliary_device_id mlx5r_mp_id_table[] = { 4873 { .name = MLX5_ADEV_NAME ".multiport", }, 4874 {}, 4875 }; 4876 4877 static const struct auxiliary_device_id mlx5r_id_table[] = { 4878 { .name = MLX5_ADEV_NAME ".rdma", }, 4879 {}, 4880 }; 4881 4882 MODULE_DEVICE_TABLE(auxiliary, mlx5r_mp_id_table); 4883 MODULE_DEVICE_TABLE(auxiliary, mlx5r_id_table); 4884 4885 static struct auxiliary_driver mlx5r_mp_driver = { 4886 .name = "multiport", 4887 .probe = mlx5r_mp_probe, 4888 .remove = mlx5r_mp_remove, 4889 .id_table = mlx5r_mp_id_table, 4890 }; 4891 4892 static struct auxiliary_driver mlx5r_driver = { 4893 .name = "rdma", 4894 .probe = mlx5r_probe, 4895 .remove = mlx5r_remove, 4896 .id_table = mlx5r_id_table, 4897 }; 4898 4899 static int __init mlx5_ib_init(void) 4900 { 4901 int ret; 4902 4903 xlt_emergency_page = (void *)__get_free_page(GFP_KERNEL); 4904 if (!xlt_emergency_page) 4905 return -ENOMEM; 4906 4907 mlx5_ib_event_wq = alloc_ordered_workqueue("mlx5_ib_event_wq", 0); 4908 if (!mlx5_ib_event_wq) { 4909 free_page((unsigned long)xlt_emergency_page); 4910 return -ENOMEM; 4911 } 4912 4913 ret = mlx5_ib_qp_event_init(); 4914 if (ret) 4915 goto qp_event_err; 4916 4917 mlx5_ib_odp_init(); 4918 ret = mlx5r_rep_init(); 4919 if (ret) 4920 goto rep_err; 4921 ret = mlx5_data_direct_driver_register(); 4922 if (ret) 4923 goto dd_err; 4924 ret = auxiliary_driver_register(&mlx5r_mp_driver); 4925 if (ret) 4926 goto mp_err; 4927 ret = auxiliary_driver_register(&mlx5r_driver); 4928 if (ret) 4929 goto drv_err; 4930 4931 return 0; 4932 4933 drv_err: 4934 auxiliary_driver_unregister(&mlx5r_mp_driver); 4935 mp_err: 4936 mlx5_data_direct_driver_unregister(); 4937 dd_err: 4938 mlx5r_rep_cleanup(); 4939 rep_err: 4940 mlx5_ib_qp_event_cleanup(); 4941 qp_event_err: 4942 destroy_workqueue(mlx5_ib_event_wq); 4943 free_page((unsigned long)xlt_emergency_page); 4944 return ret; 4945 } 4946 4947 static void __exit mlx5_ib_cleanup(void) 4948 { 4949 mlx5_data_direct_driver_unregister(); 4950 auxiliary_driver_unregister(&mlx5r_driver); 4951 auxiliary_driver_unregister(&mlx5r_mp_driver); 4952 mlx5r_rep_cleanup(); 4953 4954 mlx5_ib_qp_event_cleanup(); 4955 destroy_workqueue(mlx5_ib_event_wq); 4956 free_page((unsigned long)xlt_emergency_page); 4957 } 4958 4959 module_init(mlx5_ib_init); 4960 module_exit(mlx5_ib_cleanup); 4961