1 /*- 2 * Copyright (c) 2013-2015, Mellanox Technologies, Ltd. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 * 25 * $FreeBSD$ 26 */ 27 28 #include <linux/errno.h> 29 #include <linux/pci.h> 30 #include <linux/dma-mapping.h> 31 #include <linux/slab.h> 32 #include <linux/io-mapping.h> 33 #include <linux/sched.h> 34 #include <linux/netdevice.h> 35 #include <linux/etherdevice.h> 36 #include <linux/list.h> 37 #include <dev/mlx5/driver.h> 38 #include <dev/mlx5/vport.h> 39 #include <asm/pgtable.h> 40 #include <linux/fs.h> 41 #undef inode 42 43 #include <rdma/ib_user_verbs.h> 44 #include <rdma/ib_smi.h> 45 #include <rdma/ib_umem.h> 46 #include "user.h" 47 #include "mlx5_ib.h" 48 49 #include <sys/unistd.h> 50 #include <sys/kthread.h> 51 52 #define DRIVER_NAME "mlx5_ib" 53 #define DRIVER_VERSION "3.2-rc1" 54 #define DRIVER_RELDATE "May 2016" 55 56 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>"); 57 MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver"); 58 MODULE_LICENSE("Dual BSD/GPL"); 59 MODULE_DEPEND(mlx5ib, linuxkpi, 1, 1, 1); 60 MODULE_DEPEND(mlx5ib, mlx5, 1, 1, 1); 61 MODULE_DEPEND(mlx5ib, ibcore, 1, 1, 1); 62 MODULE_VERSION(mlx5ib, 1); 63 64 static int deprecated_prof_sel = 2; 65 module_param_named(prof_sel, deprecated_prof_sel, int, 0444); 66 MODULE_PARM_DESC(prof_sel, "profile selector. Deprecated here. Moved to module mlx5_core"); 67 68 enum { 69 MLX5_STANDARD_ATOMIC_SIZE = 0x8, 70 }; 71 72 struct workqueue_struct *mlx5_ib_wq; 73 74 static char mlx5_version[] = 75 DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v" 76 DRIVER_VERSION " (" DRIVER_RELDATE ")\n"; 77 78 static void get_atomic_caps(struct mlx5_ib_dev *dev, 79 struct ib_device_attr *props) 80 { 81 int tmp; 82 u8 atomic_operations; 83 u8 atomic_size_qp; 84 u8 atomic_req_endianess; 85 86 atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations); 87 atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp); 88 atomic_req_endianess = MLX5_CAP_ATOMIC(dev->mdev, 89 atomic_req_8B_endianess_mode) || 90 !mlx5_host_is_le(); 91 92 tmp = MLX5_ATOMIC_OPS_CMP_SWAP | MLX5_ATOMIC_OPS_FETCH_ADD; 93 if (((atomic_operations & tmp) == tmp) 94 && (atomic_size_qp & 8)) { 95 if (atomic_req_endianess) { 96 props->atomic_cap = IB_ATOMIC_HCA; 97 } else { 98 props->atomic_cap = IB_ATOMIC_NONE; 99 } 100 } else { 101 props->atomic_cap = IB_ATOMIC_NONE; 102 } 103 104 tmp = MLX5_ATOMIC_OPS_MASKED_CMP_SWAP | MLX5_ATOMIC_OPS_MASKED_FETCH_ADD; 105 if (((atomic_operations & tmp) == tmp) 106 &&(atomic_size_qp & 8)) { 107 if (atomic_req_endianess) 108 props->masked_atomic_cap = IB_ATOMIC_HCA; 109 else { 110 props->masked_atomic_cap = IB_ATOMIC_NONE; 111 } 112 } else { 113 props->masked_atomic_cap = IB_ATOMIC_NONE; 114 } 115 } 116 117 static enum rdma_link_layer 118 mlx5_ib_port_link_layer(struct ib_device *device, u8 port_num) 119 { 120 struct mlx5_ib_dev *dev = to_mdev(device); 121 122 switch (MLX5_CAP_GEN(dev->mdev, port_type)) { 123 case MLX5_CAP_PORT_TYPE_IB: 124 return IB_LINK_LAYER_INFINIBAND; 125 case MLX5_CAP_PORT_TYPE_ETH: 126 return IB_LINK_LAYER_ETHERNET; 127 default: 128 return IB_LINK_LAYER_UNSPECIFIED; 129 } 130 } 131 132 static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev) 133 { 134 return !dev->mdev->issi; 135 } 136 137 enum { 138 MLX5_VPORT_ACCESS_METHOD_MAD, 139 MLX5_VPORT_ACCESS_METHOD_HCA, 140 MLX5_VPORT_ACCESS_METHOD_NIC, 141 }; 142 143 static int mlx5_get_vport_access_method(struct ib_device *ibdev) 144 { 145 if (mlx5_use_mad_ifc(to_mdev(ibdev))) 146 return MLX5_VPORT_ACCESS_METHOD_MAD; 147 148 if (mlx5_ib_port_link_layer(ibdev, 1) == 149 IB_LINK_LAYER_ETHERNET) 150 return MLX5_VPORT_ACCESS_METHOD_NIC; 151 152 return MLX5_VPORT_ACCESS_METHOD_HCA; 153 } 154 155 static int mlx5_query_system_image_guid(struct ib_device *ibdev, 156 __be64 *sys_image_guid) 157 { 158 struct mlx5_ib_dev *dev = to_mdev(ibdev); 159 struct mlx5_core_dev *mdev = dev->mdev; 160 u64 tmp; 161 int err; 162 163 switch (mlx5_get_vport_access_method(ibdev)) { 164 case MLX5_VPORT_ACCESS_METHOD_MAD: 165 return mlx5_query_system_image_guid_mad_ifc(ibdev, 166 sys_image_guid); 167 168 case MLX5_VPORT_ACCESS_METHOD_HCA: 169 err = mlx5_query_hca_vport_system_image_guid(mdev, &tmp); 170 if (!err) 171 *sys_image_guid = cpu_to_be64(tmp); 172 return err; 173 174 case MLX5_VPORT_ACCESS_METHOD_NIC: 175 err = mlx5_query_nic_vport_system_image_guid(mdev, &tmp); 176 if (!err) 177 *sys_image_guid = cpu_to_be64(tmp); 178 return err; 179 180 default: 181 return -EINVAL; 182 } 183 } 184 185 static int mlx5_query_max_pkeys(struct ib_device *ibdev, 186 u16 *max_pkeys) 187 { 188 struct mlx5_ib_dev *dev = to_mdev(ibdev); 189 struct mlx5_core_dev *mdev = dev->mdev; 190 191 switch (mlx5_get_vport_access_method(ibdev)) { 192 case MLX5_VPORT_ACCESS_METHOD_MAD: 193 return mlx5_query_max_pkeys_mad_ifc(ibdev, max_pkeys); 194 195 case MLX5_VPORT_ACCESS_METHOD_HCA: 196 case MLX5_VPORT_ACCESS_METHOD_NIC: 197 *max_pkeys = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev, 198 pkey_table_size)); 199 return 0; 200 201 default: 202 return -EINVAL; 203 } 204 } 205 206 static int mlx5_query_vendor_id(struct ib_device *ibdev, 207 u32 *vendor_id) 208 { 209 struct mlx5_ib_dev *dev = to_mdev(ibdev); 210 211 switch (mlx5_get_vport_access_method(ibdev)) { 212 case MLX5_VPORT_ACCESS_METHOD_MAD: 213 return mlx5_query_vendor_id_mad_ifc(ibdev, vendor_id); 214 215 case MLX5_VPORT_ACCESS_METHOD_HCA: 216 case MLX5_VPORT_ACCESS_METHOD_NIC: 217 return mlx5_core_query_vendor_id(dev->mdev, vendor_id); 218 219 default: 220 return -EINVAL; 221 } 222 } 223 224 static int mlx5_query_node_guid(struct mlx5_ib_dev *dev, 225 __be64 *node_guid) 226 { 227 u64 tmp; 228 int err; 229 230 switch (mlx5_get_vport_access_method(&dev->ib_dev)) { 231 case MLX5_VPORT_ACCESS_METHOD_MAD: 232 return mlx5_query_node_guid_mad_ifc(dev, node_guid); 233 234 case MLX5_VPORT_ACCESS_METHOD_HCA: 235 err = mlx5_query_hca_vport_node_guid(dev->mdev, &tmp); 236 if (!err) 237 *node_guid = cpu_to_be64(tmp); 238 return err; 239 240 case MLX5_VPORT_ACCESS_METHOD_NIC: 241 err = mlx5_query_nic_vport_node_guid(dev->mdev, &tmp); 242 if (!err) 243 *node_guid = cpu_to_be64(tmp); 244 return err; 245 246 default: 247 return -EINVAL; 248 } 249 } 250 251 struct mlx5_reg_node_desc { 252 u8 desc[64]; 253 }; 254 255 static int mlx5_query_node_desc(struct mlx5_ib_dev *dev, char *node_desc) 256 { 257 struct mlx5_reg_node_desc in; 258 259 if (mlx5_use_mad_ifc(dev)) 260 return mlx5_query_node_desc_mad_ifc(dev, node_desc); 261 262 memset(&in, 0, sizeof(in)); 263 264 return mlx5_core_access_reg(dev->mdev, &in, sizeof(in), node_desc, 265 sizeof(struct mlx5_reg_node_desc), 266 MLX5_REG_NODE_DESC, 0, 0); 267 } 268 269 static int mlx5_ib_query_device(struct ib_device *ibdev, 270 struct ib_device_attr *props) 271 { 272 struct mlx5_ib_dev *dev = to_mdev(ibdev); 273 struct mlx5_core_dev *mdev = dev->mdev; 274 int max_sq_desc; 275 int max_rq_sg; 276 int max_sq_sg; 277 int err; 278 279 280 memset(props, 0, sizeof(*props)); 281 282 err = mlx5_query_system_image_guid(ibdev, 283 &props->sys_image_guid); 284 if (err) 285 return err; 286 287 err = mlx5_query_max_pkeys(ibdev, &props->max_pkeys); 288 if (err) 289 return err; 290 291 err = mlx5_query_vendor_id(ibdev, &props->vendor_id); 292 if (err) 293 return err; 294 295 props->fw_ver = ((u64)fw_rev_maj(dev->mdev) << 32) | 296 ((u64)fw_rev_min(dev->mdev) << 16) | 297 fw_rev_sub(dev->mdev); 298 props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT | 299 IB_DEVICE_PORT_ACTIVE_EVENT | 300 IB_DEVICE_SYS_IMAGE_GUID | 301 IB_DEVICE_RC_RNR_NAK_GEN; 302 303 if (MLX5_CAP_GEN(mdev, pkv)) 304 props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR; 305 if (MLX5_CAP_GEN(mdev, qkv)) 306 props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR; 307 if (MLX5_CAP_GEN(mdev, apm)) 308 props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG; 309 props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY; 310 if (MLX5_CAP_GEN(mdev, xrc)) 311 props->device_cap_flags |= IB_DEVICE_XRC; 312 props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; 313 if (MLX5_CAP_GEN(mdev, block_lb_mc)) 314 props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK; 315 316 props->vendor_part_id = mdev->pdev->device; 317 props->hw_ver = mdev->pdev->revision; 318 319 props->max_mr_size = ~0ull; 320 props->page_size_cap = ~(u32)((1ull << MLX5_CAP_GEN(mdev, log_pg_sz)) -1); 321 props->max_qp = 1 << MLX5_CAP_GEN(mdev, log_max_qp); 322 props->max_qp_wr = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz); 323 max_rq_sg = MLX5_CAP_GEN(mdev, max_wqe_sz_rq) / 324 sizeof(struct mlx5_wqe_data_seg); 325 max_sq_desc = min((int)MLX5_CAP_GEN(mdev, max_wqe_sz_sq), 512); 326 max_sq_sg = (max_sq_desc - 327 sizeof(struct mlx5_wqe_ctrl_seg) - 328 sizeof(struct mlx5_wqe_raddr_seg)) / sizeof(struct mlx5_wqe_data_seg); 329 props->max_sge = min(max_rq_sg, max_sq_sg); 330 props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq); 331 props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1; 332 props->max_mr = 1 << MLX5_CAP_GEN(mdev, log_max_mkey); 333 props->max_pd = 1 << MLX5_CAP_GEN(mdev, log_max_pd); 334 props->max_qp_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_req_qp); 335 props->max_qp_init_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_res_qp); 336 props->max_srq = 1 << MLX5_CAP_GEN(mdev, log_max_srq); 337 props->max_srq_wr = (1 << MLX5_CAP_GEN(mdev, log_max_srq_sz)) - 1; 338 props->local_ca_ack_delay = MLX5_CAP_GEN(mdev, local_ca_ack_delay); 339 props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; 340 props->max_srq_sge = max_rq_sg - 1; 341 props->max_fast_reg_page_list_len = (unsigned int)-1; 342 get_atomic_caps(dev, props); 343 props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg); 344 props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg); 345 props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * 346 props->max_mcast_grp; 347 props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */ 348 props->max_ah = INT_MAX; 349 350 return 0; 351 } 352 353 enum mlx5_ib_width { 354 MLX5_IB_WIDTH_1X = 1 << 0, 355 MLX5_IB_WIDTH_2X = 1 << 1, 356 MLX5_IB_WIDTH_4X = 1 << 2, 357 MLX5_IB_WIDTH_8X = 1 << 3, 358 MLX5_IB_WIDTH_12X = 1 << 4 359 }; 360 361 static int translate_active_width(struct ib_device *ibdev, u8 active_width, 362 u8 *ib_width) 363 { 364 struct mlx5_ib_dev *dev = to_mdev(ibdev); 365 int err = 0; 366 367 if (active_width & MLX5_IB_WIDTH_1X) { 368 *ib_width = IB_WIDTH_1X; 369 } else if (active_width & MLX5_IB_WIDTH_2X) { 370 mlx5_ib_warn(dev, "active_width %d is not supported by IB spec\n", 371 (int)active_width); 372 err = -EINVAL; 373 } else if (active_width & MLX5_IB_WIDTH_4X) { 374 *ib_width = IB_WIDTH_4X; 375 } else if (active_width & MLX5_IB_WIDTH_8X) { 376 *ib_width = IB_WIDTH_8X; 377 } else if (active_width & MLX5_IB_WIDTH_12X) { 378 *ib_width = IB_WIDTH_12X; 379 } else { 380 mlx5_ib_dbg(dev, "Invalid active_width %d\n", 381 (int)active_width); 382 err = -EINVAL; 383 } 384 385 return err; 386 } 387 388 /* 389 * TODO: Move to IB core 390 */ 391 enum ib_max_vl_num { 392 __IB_MAX_VL_0 = 1, 393 __IB_MAX_VL_0_1 = 2, 394 __IB_MAX_VL_0_3 = 3, 395 __IB_MAX_VL_0_7 = 4, 396 __IB_MAX_VL_0_14 = 5, 397 }; 398 399 enum mlx5_vl_hw_cap { 400 MLX5_VL_HW_0 = 1, 401 MLX5_VL_HW_0_1 = 2, 402 MLX5_VL_HW_0_2 = 3, 403 MLX5_VL_HW_0_3 = 4, 404 MLX5_VL_HW_0_4 = 5, 405 MLX5_VL_HW_0_5 = 6, 406 MLX5_VL_HW_0_6 = 7, 407 MLX5_VL_HW_0_7 = 8, 408 MLX5_VL_HW_0_14 = 15 409 }; 410 411 static int translate_max_vl_num(struct ib_device *ibdev, u8 vl_hw_cap, 412 u8 *max_vl_num) 413 { 414 switch (vl_hw_cap) { 415 case MLX5_VL_HW_0: 416 *max_vl_num = __IB_MAX_VL_0; 417 break; 418 case MLX5_VL_HW_0_1: 419 *max_vl_num = __IB_MAX_VL_0_1; 420 break; 421 case MLX5_VL_HW_0_3: 422 *max_vl_num = __IB_MAX_VL_0_3; 423 break; 424 case MLX5_VL_HW_0_7: 425 *max_vl_num = __IB_MAX_VL_0_7; 426 break; 427 case MLX5_VL_HW_0_14: 428 *max_vl_num = __IB_MAX_VL_0_14; 429 break; 430 431 default: 432 return -EINVAL; 433 } 434 435 return 0; 436 } 437 438 static int mlx5_query_port_ib(struct ib_device *ibdev, u8 port, 439 struct ib_port_attr *props) 440 { 441 struct mlx5_ib_dev *dev = to_mdev(ibdev); 442 struct mlx5_core_dev *mdev = dev->mdev; 443 u32 *rep; 444 int outlen = MLX5_ST_SZ_BYTES(query_hca_vport_context_out); 445 struct mlx5_ptys_reg *ptys; 446 struct mlx5_pmtu_reg *pmtu; 447 struct mlx5_pvlc_reg pvlc; 448 void *ctx; 449 int err; 450 451 rep = mlx5_vzalloc(outlen); 452 ptys = kzalloc(sizeof(*ptys), GFP_KERNEL); 453 pmtu = kzalloc(sizeof(*pmtu), GFP_KERNEL); 454 if (!rep || !ptys || !pmtu) { 455 err = -ENOMEM; 456 goto out; 457 } 458 459 memset(props, 0, sizeof(*props)); 460 461 /* what if I am pf with dual port */ 462 err = mlx5_query_hca_vport_context(mdev, port, 0, rep, outlen); 463 if (err) 464 goto out; 465 466 ctx = MLX5_ADDR_OF(query_hca_vport_context_out, rep, hca_vport_context); 467 468 props->lid = MLX5_GET(hca_vport_context, ctx, lid); 469 props->lmc = MLX5_GET(hca_vport_context, ctx, lmc); 470 props->sm_lid = MLX5_GET(hca_vport_context, ctx, sm_lid); 471 props->sm_sl = MLX5_GET(hca_vport_context, ctx, sm_sl); 472 props->state = MLX5_GET(hca_vport_context, ctx, vport_state); 473 props->phys_state = MLX5_GET(hca_vport_context, ctx, 474 port_physical_state); 475 props->port_cap_flags = MLX5_GET(hca_vport_context, ctx, cap_mask1); 476 props->gid_tbl_len = mlx5_get_gid_table_len(MLX5_CAP_GEN(mdev, gid_table_size)); 477 props->max_msg_sz = 1 << MLX5_CAP_GEN(mdev, log_max_msg); 478 props->pkey_tbl_len = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev, pkey_table_size)); 479 props->bad_pkey_cntr = MLX5_GET(hca_vport_context, ctx, 480 pkey_violation_counter); 481 props->qkey_viol_cntr = MLX5_GET(hca_vport_context, ctx, 482 qkey_violation_counter); 483 props->subnet_timeout = MLX5_GET(hca_vport_context, ctx, 484 subnet_timeout); 485 props->init_type_reply = MLX5_GET(hca_vport_context, ctx, 486 init_type_reply); 487 488 ptys->proto_mask |= MLX5_PTYS_IB; 489 ptys->local_port = port; 490 err = mlx5_core_access_ptys(mdev, ptys, 0); 491 if (err) 492 goto out; 493 494 err = translate_active_width(ibdev, ptys->ib_link_width_oper, 495 &props->active_width); 496 if (err) 497 goto out; 498 499 props->active_speed = (u8)ptys->ib_proto_oper; 500 501 pmtu->local_port = port; 502 err = mlx5_core_access_pmtu(mdev, pmtu, 0); 503 if (err) 504 goto out; 505 506 props->max_mtu = pmtu->max_mtu; 507 props->active_mtu = pmtu->oper_mtu; 508 509 memset(&pvlc, 0, sizeof(pvlc)); 510 pvlc.local_port = port; 511 err = mlx5_core_access_pvlc(mdev, &pvlc, 0); 512 if (err) 513 goto out; 514 515 err = translate_max_vl_num(ibdev, pvlc.vl_hw_cap, 516 &props->max_vl_num); 517 out: 518 kvfree(rep); 519 kfree(ptys); 520 kfree(pmtu); 521 return err; 522 } 523 524 int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, 525 struct ib_port_attr *props) 526 { 527 switch (mlx5_get_vport_access_method(ibdev)) { 528 case MLX5_VPORT_ACCESS_METHOD_MAD: 529 return mlx5_query_port_mad_ifc(ibdev, port, props); 530 531 case MLX5_VPORT_ACCESS_METHOD_HCA: 532 return mlx5_query_port_ib(ibdev, port, props); 533 534 case MLX5_VPORT_ACCESS_METHOD_NIC: 535 return mlx5_query_port_roce(ibdev, port, props); 536 537 default: 538 return -EINVAL; 539 } 540 } 541 542 static inline int 543 mlx5_addrconf_ifid_eui48(u8 *eui, struct net_device *dev) 544 { 545 if (dev->if_addrlen != ETH_ALEN) 546 return -1; 547 memcpy(eui, IF_LLADDR(dev), 3); 548 memcpy(eui + 5, IF_LLADDR(dev) + 3, 3); 549 550 /* NOTE: The scope ID is added by the GID to IP conversion */ 551 552 eui[3] = 0xFF; 553 eui[4] = 0xFE; 554 eui[0] ^= 2; 555 return 0; 556 } 557 558 static void 559 mlx5_make_default_gid(struct net_device *dev, union ib_gid *gid) 560 { 561 gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL); 562 mlx5_addrconf_ifid_eui48(&gid->raw[8], dev); 563 } 564 565 static inline int 566 mlx5_ip2gid(const struct sockaddr *addr, union ib_gid *gid) 567 { 568 switch (addr->sa_family) { 569 case AF_INET: 570 ipv6_addr_set_v4mapped(((const struct sockaddr_in *)addr)->sin_addr.s_addr, 571 (struct in6_addr *)gid->raw); 572 break; 573 case AF_INET6: 574 memcpy(gid->raw, &((const struct sockaddr_in6 *)addr)->sin6_addr, 16); 575 /* clear SCOPE ID */ 576 gid->raw[2] = 0; 577 gid->raw[3] = 0; 578 break; 579 default: 580 return -EINVAL; 581 } 582 return 0; 583 } 584 585 static void 586 mlx5_ib_roce_port_update(void *arg) 587 { 588 struct mlx5_ib_port *port = (struct mlx5_ib_port *)arg; 589 struct mlx5_ib_dev *dev = port->dev; 590 struct mlx5_core_dev *mdev = dev->mdev; 591 struct net_device *xdev[MLX5_IB_GID_MAX]; 592 struct net_device *idev; 593 struct net_device *ndev; 594 struct ifaddr *ifa; 595 union ib_gid gid_temp; 596 597 while (port->port_gone == 0) { 598 int update = 0; 599 int gid_index = 0; 600 int j; 601 int error; 602 603 ndev = mlx5_get_protocol_dev(mdev, MLX5_INTERFACE_PROTOCOL_ETH); 604 if (ndev == NULL) { 605 pause("W", hz); 606 continue; 607 } 608 609 CURVNET_SET_QUIET(ndev->if_vnet); 610 611 memset(&gid_temp, 0, sizeof(gid_temp)); 612 mlx5_make_default_gid(ndev, &gid_temp); 613 if (bcmp(&gid_temp, &port->gid_table[gid_index], sizeof(gid_temp))) { 614 port->gid_table[gid_index] = gid_temp; 615 update = 1; 616 } 617 xdev[gid_index] = ndev; 618 gid_index++; 619 620 IFNET_RLOCK(); 621 TAILQ_FOREACH(idev, &V_ifnet, if_link) { 622 if (idev == ndev) 623 break; 624 } 625 if (idev != NULL) { 626 TAILQ_FOREACH(idev, &V_ifnet, if_link) { 627 if (idev != ndev) { 628 if (idev->if_type != IFT_L2VLAN) 629 continue; 630 if (ndev != rdma_vlan_dev_real_dev(idev)) 631 continue; 632 } 633 /* clone address information for IPv4 and IPv6 */ 634 IF_ADDR_RLOCK(idev); 635 TAILQ_FOREACH(ifa, &idev->if_addrhead, ifa_link) { 636 if (ifa->ifa_addr == NULL || 637 (ifa->ifa_addr->sa_family != AF_INET && 638 ifa->ifa_addr->sa_family != AF_INET6) || 639 gid_index >= MLX5_IB_GID_MAX) 640 continue; 641 memset(&gid_temp, 0, sizeof(gid_temp)); 642 mlx5_ip2gid(ifa->ifa_addr, &gid_temp); 643 /* check for existing entry */ 644 for (j = 0; j != gid_index; j++) { 645 if (bcmp(&gid_temp, &port->gid_table[j], sizeof(gid_temp)) == 0) 646 break; 647 } 648 /* check if new entry must be added */ 649 if (j == gid_index) { 650 if (bcmp(&gid_temp, &port->gid_table[gid_index], sizeof(gid_temp))) { 651 port->gid_table[gid_index] = gid_temp; 652 update = 1; 653 } 654 xdev[gid_index] = idev; 655 gid_index++; 656 } 657 } 658 IF_ADDR_RUNLOCK(idev); 659 } 660 } 661 IFNET_RUNLOCK(); 662 CURVNET_RESTORE(); 663 664 if (update != 0 && 665 mlx5_ib_port_link_layer(&dev->ib_dev, 1) == IB_LINK_LAYER_ETHERNET) { 666 struct ib_event event = { 667 .device = &dev->ib_dev, 668 .element.port_num = port->port_num + 1, 669 .event = IB_EVENT_GID_CHANGE, 670 }; 671 672 /* add new entries, if any */ 673 for (j = 0; j != gid_index; j++) { 674 error = modify_gid_roce(&dev->ib_dev, port->port_num, j, 675 port->gid_table + j, xdev[j]); 676 if (error != 0) 677 printf("mlx5_ib: Failed to update ROCE GID table: %d\n", error); 678 } 679 memset(&gid_temp, 0, sizeof(gid_temp)); 680 681 /* clear old entries, if any */ 682 for (; j != MLX5_IB_GID_MAX; j++) { 683 if (bcmp(&gid_temp, port->gid_table + j, sizeof(gid_temp)) == 0) 684 continue; 685 port->gid_table[j] = gid_temp; 686 (void) modify_gid_roce(&dev->ib_dev, port->port_num, j, 687 port->gid_table + j, ndev); 688 } 689 690 /* make sure ibcore gets updated */ 691 ib_dispatch_event(&event); 692 } 693 pause("W", hz); 694 } 695 do { 696 struct ib_event event = { 697 .device = &dev->ib_dev, 698 .element.port_num = port->port_num + 1, 699 .event = IB_EVENT_GID_CHANGE, 700 }; 701 /* make sure ibcore gets updated */ 702 ib_dispatch_event(&event); 703 704 /* wait a bit */ 705 pause("W", hz); 706 } while (0); 707 port->port_gone = 2; 708 kthread_exit(); 709 } 710 711 static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index, 712 union ib_gid *gid) 713 { 714 struct mlx5_ib_dev *dev = to_mdev(ibdev); 715 struct mlx5_core_dev *mdev = dev->mdev; 716 717 switch (mlx5_get_vport_access_method(ibdev)) { 718 case MLX5_VPORT_ACCESS_METHOD_MAD: 719 return mlx5_query_gids_mad_ifc(ibdev, port, index, gid); 720 721 case MLX5_VPORT_ACCESS_METHOD_HCA: 722 return mlx5_query_hca_vport_gid(mdev, port, 0, index, gid); 723 724 case MLX5_VPORT_ACCESS_METHOD_NIC: 725 if (port == 0 || port > MLX5_CAP_GEN(mdev, num_ports) || 726 index < 0 || index >= MLX5_IB_GID_MAX || 727 dev->port[port - 1].port_gone != 0) 728 memset(gid, 0, sizeof(*gid)); 729 else 730 *gid = dev->port[port - 1].gid_table[index]; 731 return 0; 732 733 default: 734 return -EINVAL; 735 } 736 } 737 738 static int mlx5_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, 739 u16 *pkey) 740 { 741 struct mlx5_ib_dev *dev = to_mdev(ibdev); 742 struct mlx5_core_dev *mdev = dev->mdev; 743 744 switch (mlx5_get_vport_access_method(ibdev)) { 745 case MLX5_VPORT_ACCESS_METHOD_MAD: 746 return mlx5_query_pkey_mad_ifc(ibdev, port, index, pkey); 747 748 case MLX5_VPORT_ACCESS_METHOD_HCA: 749 case MLX5_VPORT_ACCESS_METHOD_NIC: 750 return mlx5_query_hca_vport_pkey(mdev, 0, port, 0, index, 751 pkey); 752 753 default: 754 return -EINVAL; 755 } 756 } 757 758 static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask, 759 struct ib_device_modify *props) 760 { 761 struct mlx5_ib_dev *dev = to_mdev(ibdev); 762 struct mlx5_reg_node_desc in; 763 struct mlx5_reg_node_desc out; 764 int err; 765 766 if (mask & ~IB_DEVICE_MODIFY_NODE_DESC) 767 return -EOPNOTSUPP; 768 769 if (!(mask & IB_DEVICE_MODIFY_NODE_DESC)) 770 return 0; 771 772 /* 773 * If possible, pass node desc to FW, so it can generate 774 * a 144 trap. If cmd fails, just ignore. 775 */ 776 memcpy(&in, props->node_desc, 64); 777 err = mlx5_core_access_reg(dev->mdev, &in, sizeof(in), &out, 778 sizeof(out), MLX5_REG_NODE_DESC, 0, 1); 779 if (err) 780 return err; 781 782 memcpy(ibdev->node_desc, props->node_desc, 64); 783 784 return err; 785 } 786 787 static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask, 788 struct ib_port_modify *props) 789 { 790 u8 is_eth = (mlx5_ib_port_link_layer(ibdev, port) == 791 IB_LINK_LAYER_ETHERNET); 792 struct mlx5_ib_dev *dev = to_mdev(ibdev); 793 struct ib_port_attr attr; 794 u32 tmp; 795 int err; 796 797 /* return OK if this is RoCE. CM calls ib_modify_port() regardless 798 * of whether port link layer is ETH or IB. For ETH ports, qkey 799 * violations and port capabilities are not valid. 800 */ 801 if (is_eth) 802 return 0; 803 804 mutex_lock(&dev->cap_mask_mutex); 805 806 err = mlx5_ib_query_port(ibdev, port, &attr); 807 if (err) 808 goto out; 809 810 tmp = (attr.port_cap_flags | props->set_port_cap_mask) & 811 ~props->clr_port_cap_mask; 812 813 err = mlx5_set_port_caps(dev->mdev, port, tmp); 814 815 out: 816 mutex_unlock(&dev->cap_mask_mutex); 817 return err; 818 } 819 820 enum mlx5_cap_flags { 821 MLX5_CAP_COMPACT_AV = 1 << 0, 822 }; 823 824 static void set_mlx5_flags(u32 *flags, struct mlx5_core_dev *dev) 825 { 826 *flags |= MLX5_CAP_GEN(dev, compact_address_vector) ? 827 MLX5_CAP_COMPACT_AV : 0; 828 } 829 830 static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, 831 struct ib_udata *udata) 832 { 833 struct mlx5_ib_dev *dev = to_mdev(ibdev); 834 struct mlx5_ib_alloc_ucontext_req_v2 req; 835 struct mlx5_ib_alloc_ucontext_resp resp; 836 struct mlx5_ib_ucontext *context; 837 struct mlx5_uuar_info *uuari; 838 struct mlx5_uar *uars; 839 int gross_uuars; 840 int num_uars; 841 int ver; 842 int uuarn; 843 int err; 844 int i; 845 size_t reqlen; 846 847 if (!dev->ib_active) 848 return ERR_PTR(-EAGAIN); 849 850 memset(&req, 0, sizeof(req)); 851 memset(&resp, 0, sizeof(resp)); 852 853 reqlen = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr); 854 if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req)) 855 ver = 0; 856 else if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req_v2)) 857 ver = 2; 858 else { 859 mlx5_ib_err(dev, "request malformed, reqlen: %ld\n", (long)reqlen); 860 return ERR_PTR(-EINVAL); 861 } 862 863 err = ib_copy_from_udata(&req, udata, reqlen); 864 if (err) { 865 mlx5_ib_err(dev, "copy failed\n"); 866 return ERR_PTR(err); 867 } 868 869 if (req.reserved) { 870 mlx5_ib_err(dev, "request corrupted\n"); 871 return ERR_PTR(-EINVAL); 872 } 873 874 if (req.total_num_uuars == 0 || req.total_num_uuars > MLX5_MAX_UUARS) { 875 mlx5_ib_warn(dev, "wrong num_uuars: %d\n", req.total_num_uuars); 876 return ERR_PTR(-ENOMEM); 877 } 878 879 req.total_num_uuars = ALIGN(req.total_num_uuars, 880 MLX5_NON_FP_BF_REGS_PER_PAGE); 881 if (req.num_low_latency_uuars > req.total_num_uuars - 1) { 882 mlx5_ib_warn(dev, "wrong num_low_latency_uuars: %d ( > %d)\n", 883 req.total_num_uuars, req.total_num_uuars); 884 return ERR_PTR(-EINVAL); 885 } 886 887 num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE; 888 gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE; 889 resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp); 890 if (mlx5_core_is_pf(dev->mdev) && MLX5_CAP_GEN(dev->mdev, bf)) 891 resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size); 892 resp.cache_line_size = L1_CACHE_BYTES; 893 resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq); 894 resp.max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq); 895 resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); 896 resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); 897 resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz); 898 set_mlx5_flags(&resp.flags, dev->mdev); 899 900 if (offsetof(struct mlx5_ib_alloc_ucontext_resp, max_desc_sz_sq_dc) < udata->outlen) 901 resp.max_desc_sz_sq_dc = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq_dc); 902 903 if (offsetof(struct mlx5_ib_alloc_ucontext_resp, atomic_arg_sizes_dc) < udata->outlen) 904 resp.atomic_arg_sizes_dc = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_dc); 905 906 context = kzalloc(sizeof(*context), GFP_KERNEL); 907 if (!context) 908 return ERR_PTR(-ENOMEM); 909 910 uuari = &context->uuari; 911 mutex_init(&uuari->lock); 912 uars = kcalloc(num_uars, sizeof(*uars), GFP_KERNEL); 913 if (!uars) { 914 err = -ENOMEM; 915 goto out_ctx; 916 } 917 918 uuari->bitmap = kcalloc(BITS_TO_LONGS(gross_uuars), 919 sizeof(*uuari->bitmap), 920 GFP_KERNEL); 921 if (!uuari->bitmap) { 922 err = -ENOMEM; 923 goto out_uar_ctx; 924 } 925 /* 926 * clear all fast path uuars 927 */ 928 for (i = 0; i < gross_uuars; i++) { 929 uuarn = i & 3; 930 if (uuarn == 2 || uuarn == 3) 931 set_bit(i, uuari->bitmap); 932 } 933 934 uuari->count = kcalloc(gross_uuars, sizeof(*uuari->count), GFP_KERNEL); 935 if (!uuari->count) { 936 err = -ENOMEM; 937 goto out_bitmap; 938 } 939 940 for (i = 0; i < num_uars; i++) { 941 err = mlx5_cmd_alloc_uar(dev->mdev, &uars[i].index); 942 if (err) { 943 mlx5_ib_err(dev, "uar alloc failed at %d\n", i); 944 goto out_uars; 945 } 946 } 947 for (i = 0; i < MLX5_IB_MAX_CTX_DYNAMIC_UARS; i++) 948 context->dynamic_wc_uar_index[i] = MLX5_IB_INVALID_UAR_INDEX; 949 950 INIT_LIST_HEAD(&context->db_page_list); 951 mutex_init(&context->db_page_mutex); 952 953 resp.tot_uuars = req.total_num_uuars; 954 resp.num_ports = MLX5_CAP_GEN(dev->mdev, num_ports); 955 err = ib_copy_to_udata(udata, &resp, 956 min_t(size_t, udata->outlen, sizeof(resp))); 957 if (err) 958 goto out_uars; 959 960 uuari->ver = ver; 961 uuari->num_low_latency_uuars = req.num_low_latency_uuars; 962 uuari->uars = uars; 963 uuari->num_uars = num_uars; 964 965 if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) == 966 IB_LINK_LAYER_ETHERNET) { 967 err = mlx5_alloc_transport_domain(dev->mdev, &context->tdn); 968 if (err) 969 goto out_uars; 970 } 971 972 return &context->ibucontext; 973 974 out_uars: 975 for (i--; i >= 0; i--) 976 mlx5_cmd_free_uar(dev->mdev, uars[i].index); 977 kfree(uuari->count); 978 979 out_bitmap: 980 kfree(uuari->bitmap); 981 982 out_uar_ctx: 983 kfree(uars); 984 985 out_ctx: 986 kfree(context); 987 return ERR_PTR(err); 988 } 989 990 static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) 991 { 992 struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); 993 struct mlx5_ib_dev *dev = to_mdev(ibcontext->device); 994 struct mlx5_uuar_info *uuari = &context->uuari; 995 int i; 996 997 if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) == 998 IB_LINK_LAYER_ETHERNET) 999 mlx5_dealloc_transport_domain(dev->mdev, context->tdn); 1000 1001 for (i = 0; i < uuari->num_uars; i++) { 1002 if (mlx5_cmd_free_uar(dev->mdev, uuari->uars[i].index)) 1003 mlx5_ib_warn(dev, "failed to free UAR 0x%x\n", uuari->uars[i].index); 1004 } 1005 for (i = 0; i < MLX5_IB_MAX_CTX_DYNAMIC_UARS; i++) { 1006 if (context->dynamic_wc_uar_index[i] != MLX5_IB_INVALID_UAR_INDEX) 1007 mlx5_cmd_free_uar(dev->mdev, context->dynamic_wc_uar_index[i]); 1008 } 1009 1010 kfree(uuari->count); 1011 kfree(uuari->bitmap); 1012 kfree(uuari->uars); 1013 kfree(context); 1014 1015 return 0; 1016 } 1017 1018 static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev, int index) 1019 { 1020 return (pci_resource_start(dev->mdev->pdev, 0) >> PAGE_SHIFT) + index; 1021 } 1022 1023 static int get_command(unsigned long offset) 1024 { 1025 return (offset >> MLX5_IB_MMAP_CMD_SHIFT) & MLX5_IB_MMAP_CMD_MASK; 1026 } 1027 1028 static int get_arg(unsigned long offset) 1029 { 1030 return offset & ((1 << MLX5_IB_MMAP_CMD_SHIFT) - 1); 1031 } 1032 1033 static int get_index(unsigned long offset) 1034 { 1035 return get_arg(offset); 1036 } 1037 1038 static int uar_mmap(struct vm_area_struct *vma, pgprot_t prot, bool is_wc, 1039 struct mlx5_uuar_info *uuari, struct mlx5_ib_dev *dev, 1040 struct mlx5_ib_ucontext *context) 1041 { 1042 unsigned long idx; 1043 phys_addr_t pfn; 1044 1045 if (vma->vm_end - vma->vm_start != PAGE_SIZE) { 1046 mlx5_ib_warn(dev, "wrong size, expected PAGE_SIZE(%ld) got %ld\n", 1047 (long)PAGE_SIZE, (long)(vma->vm_end - vma->vm_start)); 1048 return -EINVAL; 1049 } 1050 1051 idx = get_index(vma->vm_pgoff); 1052 if (idx >= uuari->num_uars) { 1053 mlx5_ib_warn(dev, "wrong offset, idx:%ld num_uars:%d\n", 1054 idx, uuari->num_uars); 1055 return -EINVAL; 1056 } 1057 1058 pfn = uar_index2pfn(dev, uuari->uars[idx].index); 1059 mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn 0x%llx\n", idx, 1060 (unsigned long long)pfn); 1061 1062 vma->vm_page_prot = prot; 1063 if (io_remap_pfn_range(vma, vma->vm_start, pfn, 1064 PAGE_SIZE, vma->vm_page_prot)) { 1065 mlx5_ib_err(dev, "io remap failed\n"); 1066 return -EAGAIN; 1067 } 1068 1069 mlx5_ib_dbg(dev, "mapped %s at 0x%lx, PA 0x%llx\n", is_wc ? "WC" : "NC", 1070 (long)vma->vm_start, (unsigned long long)pfn << PAGE_SHIFT); 1071 1072 return 0; 1073 } 1074 1075 static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) 1076 { 1077 struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); 1078 struct mlx5_ib_dev *dev = to_mdev(ibcontext->device); 1079 struct mlx5_uuar_info *uuari = &context->uuari; 1080 unsigned long command; 1081 1082 command = get_command(vma->vm_pgoff); 1083 switch (command) { 1084 case MLX5_IB_MMAP_REGULAR_PAGE: 1085 return uar_mmap(vma, pgprot_writecombine(vma->vm_page_prot), 1086 true, 1087 uuari, dev, context); 1088 1089 break; 1090 1091 case MLX5_IB_MMAP_WC_PAGE: 1092 return uar_mmap(vma, pgprot_writecombine(vma->vm_page_prot), 1093 true, uuari, dev, context); 1094 break; 1095 1096 case MLX5_IB_MMAP_NC_PAGE: 1097 return uar_mmap(vma, pgprot_noncached(vma->vm_page_prot), 1098 false, uuari, dev, context); 1099 break; 1100 1101 default: 1102 return -EINVAL; 1103 } 1104 1105 return 0; 1106 } 1107 1108 static int alloc_pa_mkey(struct mlx5_ib_dev *dev, u32 *key, u32 pdn) 1109 { 1110 struct mlx5_create_mkey_mbox_in *in; 1111 struct mlx5_mkey_seg *seg; 1112 struct mlx5_core_mr mr; 1113 int err; 1114 1115 in = kzalloc(sizeof(*in), GFP_KERNEL); 1116 if (!in) 1117 return -ENOMEM; 1118 1119 seg = &in->seg; 1120 seg->flags = MLX5_PERM_LOCAL_READ | MLX5_ACCESS_MODE_PA; 1121 seg->flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64); 1122 seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 1123 seg->start_addr = 0; 1124 1125 err = mlx5_core_create_mkey(dev->mdev, &mr, in, sizeof(*in), 1126 NULL, NULL, NULL); 1127 if (err) { 1128 mlx5_ib_warn(dev, "failed to create mkey, %d\n", err); 1129 goto err_in; 1130 } 1131 1132 kfree(in); 1133 *key = mr.key; 1134 1135 return 0; 1136 1137 err_in: 1138 kfree(in); 1139 1140 return err; 1141 } 1142 1143 static void free_pa_mkey(struct mlx5_ib_dev *dev, u32 key) 1144 { 1145 struct mlx5_core_mr mr; 1146 int err; 1147 1148 memset(&mr, 0, sizeof(mr)); 1149 mr.key = key; 1150 err = mlx5_core_destroy_mkey(dev->mdev, &mr); 1151 if (err) 1152 mlx5_ib_warn(dev, "failed to destroy mkey 0x%x\n", key); 1153 } 1154 1155 static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev, 1156 struct ib_ucontext *context, 1157 struct ib_udata *udata) 1158 { 1159 struct mlx5_ib_dev *dev = to_mdev(ibdev); 1160 struct mlx5_ib_alloc_pd_resp resp; 1161 struct mlx5_ib_pd *pd; 1162 int err; 1163 1164 pd = kmalloc(sizeof(*pd), GFP_KERNEL); 1165 if (!pd) 1166 return ERR_PTR(-ENOMEM); 1167 1168 err = mlx5_core_alloc_pd(to_mdev(ibdev)->mdev, &pd->pdn); 1169 if (err) { 1170 mlx5_ib_warn(dev, "pd alloc failed\n"); 1171 kfree(pd); 1172 return ERR_PTR(err); 1173 } 1174 1175 if (context) { 1176 resp.pdn = pd->pdn; 1177 if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { 1178 mlx5_ib_err(dev, "copy failed\n"); 1179 mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn); 1180 kfree(pd); 1181 return ERR_PTR(-EFAULT); 1182 } 1183 } else { 1184 err = alloc_pa_mkey(to_mdev(ibdev), &pd->pa_lkey, pd->pdn); 1185 if (err) { 1186 mlx5_ib_err(dev, "alloc mkey failed\n"); 1187 mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn); 1188 kfree(pd); 1189 return ERR_PTR(err); 1190 } 1191 } 1192 1193 return &pd->ibpd; 1194 } 1195 1196 static int mlx5_ib_dealloc_pd(struct ib_pd *pd) 1197 { 1198 struct mlx5_ib_dev *mdev = to_mdev(pd->device); 1199 struct mlx5_ib_pd *mpd = to_mpd(pd); 1200 1201 if (!pd->uobject) 1202 free_pa_mkey(mdev, mpd->pa_lkey); 1203 1204 mlx5_core_dealloc_pd(mdev->mdev, mpd->pdn); 1205 kfree(mpd); 1206 1207 return 0; 1208 } 1209 1210 static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) 1211 { 1212 struct mlx5_ib_dev *dev = to_mdev(ibqp->device); 1213 int err; 1214 1215 if (ibqp->qp_type == IB_QPT_RAW_PACKET) 1216 err = -EOPNOTSUPP; 1217 else 1218 err = mlx5_core_attach_mcg(dev->mdev, gid, ibqp->qp_num); 1219 if (err) 1220 mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n", 1221 ibqp->qp_num, gid->raw); 1222 1223 return err; 1224 } 1225 1226 static int mlx5_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) 1227 { 1228 struct mlx5_ib_dev *dev = to_mdev(ibqp->device); 1229 int err; 1230 1231 if (ibqp->qp_type == IB_QPT_RAW_PACKET) 1232 err = -EOPNOTSUPP; 1233 else 1234 err = mlx5_core_detach_mcg(dev->mdev, gid, ibqp->qp_num); 1235 if (err) 1236 mlx5_ib_warn(dev, "failed detaching QPN 0x%x, MGID %pI6\n", 1237 ibqp->qp_num, gid->raw); 1238 1239 return err; 1240 } 1241 1242 static int init_node_data(struct mlx5_ib_dev *dev) 1243 { 1244 int err; 1245 1246 err = mlx5_query_node_desc(dev, dev->ib_dev.node_desc); 1247 if (err) 1248 return err; 1249 1250 return mlx5_query_node_guid(dev, &dev->ib_dev.node_guid); 1251 } 1252 1253 static ssize_t show_fw_pages(struct device *device, struct device_attribute *attr, 1254 char *buf) 1255 { 1256 struct mlx5_ib_dev *dev = 1257 container_of(device, struct mlx5_ib_dev, ib_dev.dev); 1258 1259 return sprintf(buf, "%lld\n", (long long)dev->mdev->priv.fw_pages); 1260 } 1261 1262 static ssize_t show_reg_pages(struct device *device, 1263 struct device_attribute *attr, char *buf) 1264 { 1265 struct mlx5_ib_dev *dev = 1266 container_of(device, struct mlx5_ib_dev, ib_dev.dev); 1267 1268 return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages)); 1269 } 1270 1271 static ssize_t show_hca(struct device *device, struct device_attribute *attr, 1272 char *buf) 1273 { 1274 struct mlx5_ib_dev *dev = 1275 container_of(device, struct mlx5_ib_dev, ib_dev.dev); 1276 return sprintf(buf, "MT%d\n", dev->mdev->pdev->device); 1277 } 1278 1279 static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr, 1280 char *buf) 1281 { 1282 struct mlx5_ib_dev *dev = 1283 container_of(device, struct mlx5_ib_dev, ib_dev.dev); 1284 return sprintf(buf, "%d.%d.%04d\n", fw_rev_maj(dev->mdev), 1285 fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev)); 1286 } 1287 1288 static ssize_t show_rev(struct device *device, struct device_attribute *attr, 1289 char *buf) 1290 { 1291 struct mlx5_ib_dev *dev = 1292 container_of(device, struct mlx5_ib_dev, ib_dev.dev); 1293 return sprintf(buf, "%x\n", (unsigned)dev->mdev->pdev->revision); 1294 } 1295 1296 static ssize_t show_board(struct device *device, struct device_attribute *attr, 1297 char *buf) 1298 { 1299 struct mlx5_ib_dev *dev = 1300 container_of(device, struct mlx5_ib_dev, ib_dev.dev); 1301 return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN, 1302 dev->mdev->board_id); 1303 } 1304 1305 static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); 1306 static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); 1307 static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); 1308 static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); 1309 static DEVICE_ATTR(fw_pages, S_IRUGO, show_fw_pages, NULL); 1310 static DEVICE_ATTR(reg_pages, S_IRUGO, show_reg_pages, NULL); 1311 1312 static struct device_attribute *mlx5_class_attributes[] = { 1313 &dev_attr_hw_rev, 1314 &dev_attr_fw_ver, 1315 &dev_attr_hca_type, 1316 &dev_attr_board_id, 1317 &dev_attr_fw_pages, 1318 &dev_attr_reg_pages, 1319 }; 1320 1321 static void mlx5_ib_handle_internal_error(struct mlx5_ib_dev *ibdev) 1322 { 1323 struct mlx5_ib_qp *mqp; 1324 struct mlx5_ib_cq *send_mcq, *recv_mcq; 1325 struct mlx5_core_cq *mcq; 1326 struct list_head cq_armed_list; 1327 unsigned long flags_qp; 1328 unsigned long flags_cq; 1329 unsigned long flags; 1330 1331 mlx5_ib_warn(ibdev, " started\n"); 1332 INIT_LIST_HEAD(&cq_armed_list); 1333 1334 /* Go over qp list reside on that ibdev, sync with create/destroy qp.*/ 1335 spin_lock_irqsave(&ibdev->reset_flow_resource_lock, flags); 1336 list_for_each_entry(mqp, &ibdev->qp_list, qps_list) { 1337 spin_lock_irqsave(&mqp->sq.lock, flags_qp); 1338 if (mqp->sq.tail != mqp->sq.head) { 1339 send_mcq = to_mcq(mqp->ibqp.send_cq); 1340 spin_lock_irqsave(&send_mcq->lock, flags_cq); 1341 if (send_mcq->mcq.comp && 1342 mqp->ibqp.send_cq->comp_handler) { 1343 if (!send_mcq->mcq.reset_notify_added) { 1344 send_mcq->mcq.reset_notify_added = 1; 1345 list_add_tail(&send_mcq->mcq.reset_notify, 1346 &cq_armed_list); 1347 } 1348 } 1349 spin_unlock_irqrestore(&send_mcq->lock, flags_cq); 1350 } 1351 spin_unlock_irqrestore(&mqp->sq.lock, flags_qp); 1352 spin_lock_irqsave(&mqp->rq.lock, flags_qp); 1353 /* no handling is needed for SRQ */ 1354 if (!mqp->ibqp.srq) { 1355 if (mqp->rq.tail != mqp->rq.head) { 1356 recv_mcq = to_mcq(mqp->ibqp.recv_cq); 1357 spin_lock_irqsave(&recv_mcq->lock, flags_cq); 1358 if (recv_mcq->mcq.comp && 1359 mqp->ibqp.recv_cq->comp_handler) { 1360 if (!recv_mcq->mcq.reset_notify_added) { 1361 recv_mcq->mcq.reset_notify_added = 1; 1362 list_add_tail(&recv_mcq->mcq.reset_notify, 1363 &cq_armed_list); 1364 } 1365 } 1366 spin_unlock_irqrestore(&recv_mcq->lock, 1367 flags_cq); 1368 } 1369 } 1370 spin_unlock_irqrestore(&mqp->rq.lock, flags_qp); 1371 } 1372 /*At that point all inflight post send were put to be executed as of we 1373 * lock/unlock above locks Now need to arm all involved CQs. 1374 */ 1375 list_for_each_entry(mcq, &cq_armed_list, reset_notify) { 1376 mcq->comp(mcq); 1377 } 1378 spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags); 1379 mlx5_ib_warn(ibdev, " ended\n"); 1380 return; 1381 } 1382 1383 static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, 1384 enum mlx5_dev_event event, unsigned long param) 1385 { 1386 struct mlx5_ib_dev *ibdev = (struct mlx5_ib_dev *)context; 1387 struct ib_event ibev; 1388 1389 u8 port = 0; 1390 1391 switch (event) { 1392 case MLX5_DEV_EVENT_SYS_ERROR: 1393 ibdev->ib_active = false; 1394 ibev.event = IB_EVENT_DEVICE_FATAL; 1395 mlx5_ib_handle_internal_error(ibdev); 1396 break; 1397 1398 case MLX5_DEV_EVENT_PORT_UP: 1399 ibev.event = IB_EVENT_PORT_ACTIVE; 1400 port = (u8)param; 1401 break; 1402 1403 case MLX5_DEV_EVENT_PORT_DOWN: 1404 case MLX5_DEV_EVENT_PORT_INITIALIZED: 1405 ibev.event = IB_EVENT_PORT_ERR; 1406 port = (u8)param; 1407 break; 1408 1409 case MLX5_DEV_EVENT_LID_CHANGE: 1410 ibev.event = IB_EVENT_LID_CHANGE; 1411 port = (u8)param; 1412 break; 1413 1414 case MLX5_DEV_EVENT_PKEY_CHANGE: 1415 ibev.event = IB_EVENT_PKEY_CHANGE; 1416 port = (u8)param; 1417 break; 1418 1419 case MLX5_DEV_EVENT_GUID_CHANGE: 1420 ibev.event = IB_EVENT_GID_CHANGE; 1421 port = (u8)param; 1422 break; 1423 1424 case MLX5_DEV_EVENT_CLIENT_REREG: 1425 ibev.event = IB_EVENT_CLIENT_REREGISTER; 1426 port = (u8)param; 1427 break; 1428 1429 default: 1430 break; 1431 } 1432 1433 ibev.device = &ibdev->ib_dev; 1434 ibev.element.port_num = port; 1435 1436 if ((event != MLX5_DEV_EVENT_SYS_ERROR) && 1437 (port < 1 || port > ibdev->num_ports)) { 1438 mlx5_ib_warn(ibdev, "warning: event on port %d\n", port); 1439 return; 1440 } 1441 1442 if (ibdev->ib_active) 1443 ib_dispatch_event(&ibev); 1444 } 1445 1446 static void get_ext_port_caps(struct mlx5_ib_dev *dev) 1447 { 1448 int port; 1449 1450 for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) 1451 mlx5_query_ext_port_caps(dev, port); 1452 } 1453 1454 static void config_atomic_responder(struct mlx5_ib_dev *dev, 1455 struct ib_device_attr *props) 1456 { 1457 enum ib_atomic_cap cap = props->atomic_cap; 1458 1459 #if 0 1460 if (cap == IB_ATOMIC_HCA || 1461 cap == IB_ATOMIC_GLOB) 1462 #endif 1463 dev->enable_atomic_resp = 1; 1464 1465 dev->atomic_cap = cap; 1466 } 1467 1468 enum mlx5_addr_align { 1469 MLX5_ADDR_ALIGN_0 = 0, 1470 MLX5_ADDR_ALIGN_64 = 64, 1471 MLX5_ADDR_ALIGN_128 = 128, 1472 }; 1473 1474 static int get_port_caps(struct mlx5_ib_dev *dev) 1475 { 1476 struct ib_device_attr *dprops = NULL; 1477 struct ib_port_attr *pprops = NULL; 1478 int err = -ENOMEM; 1479 int port; 1480 1481 pprops = kmalloc(sizeof(*pprops), GFP_KERNEL); 1482 if (!pprops) 1483 goto out; 1484 1485 dprops = kmalloc(sizeof(*dprops), GFP_KERNEL); 1486 if (!dprops) 1487 goto out; 1488 1489 err = mlx5_ib_query_device(&dev->ib_dev, dprops); 1490 if (err) { 1491 mlx5_ib_warn(dev, "query_device failed %d\n", err); 1492 goto out; 1493 } 1494 config_atomic_responder(dev, dprops); 1495 1496 for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) { 1497 err = mlx5_ib_query_port(&dev->ib_dev, port, pprops); 1498 if (err) { 1499 mlx5_ib_warn(dev, "query_port %d failed %d\n", 1500 port, err); 1501 break; 1502 } 1503 dev->mdev->port_caps[port - 1].pkey_table_len = dprops->max_pkeys; 1504 dev->mdev->port_caps[port - 1].gid_table_len = pprops->gid_tbl_len; 1505 mlx5_ib_dbg(dev, "pkey_table_len %d, gid_table_len %d\n", 1506 dprops->max_pkeys, pprops->gid_tbl_len); 1507 } 1508 1509 out: 1510 kfree(pprops); 1511 kfree(dprops); 1512 1513 return err; 1514 } 1515 1516 static void destroy_umrc_res(struct mlx5_ib_dev *dev) 1517 { 1518 int err; 1519 1520 err = mlx5_mr_cache_cleanup(dev); 1521 if (err) 1522 mlx5_ib_warn(dev, "mr cache cleanup failed\n"); 1523 1524 ib_dereg_mr(dev->umrc.mr); 1525 ib_dealloc_pd(dev->umrc.pd); 1526 } 1527 1528 enum { 1529 MAX_UMR_WR = 128, 1530 }; 1531 1532 static int create_umr_res(struct mlx5_ib_dev *dev) 1533 { 1534 struct ib_pd *pd; 1535 struct ib_mr *mr; 1536 int ret; 1537 1538 pd = ib_alloc_pd(&dev->ib_dev); 1539 if (IS_ERR(pd)) { 1540 mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n"); 1541 ret = PTR_ERR(pd); 1542 goto error_0; 1543 } 1544 1545 mr = ib_get_dma_mr(pd, IB_ACCESS_LOCAL_WRITE); 1546 if (IS_ERR(mr)) { 1547 mlx5_ib_dbg(dev, "Couldn't create DMA MR for sync UMR QP\n"); 1548 ret = PTR_ERR(mr); 1549 goto error_1; 1550 } 1551 1552 dev->umrc.mr = mr; 1553 dev->umrc.pd = pd; 1554 1555 ret = mlx5_mr_cache_init(dev); 1556 if (ret) { 1557 mlx5_ib_warn(dev, "mr cache init failed %d\n", ret); 1558 goto error_4; 1559 } 1560 1561 return 0; 1562 1563 error_4: 1564 ib_dereg_mr(mr); 1565 error_1: 1566 ib_dealloc_pd(pd); 1567 error_0: 1568 return ret; 1569 } 1570 1571 static int create_dev_resources(struct mlx5_ib_resources *devr) 1572 { 1573 struct ib_srq_init_attr attr; 1574 struct mlx5_ib_dev *dev; 1575 int ret = 0; 1576 struct ib_cq_init_attr cq_attr = { .cqe = 1 }; 1577 1578 dev = container_of(devr, struct mlx5_ib_dev, devr); 1579 1580 devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL); 1581 if (IS_ERR(devr->p0)) { 1582 ret = PTR_ERR(devr->p0); 1583 goto error0; 1584 } 1585 devr->p0->device = &dev->ib_dev; 1586 devr->p0->uobject = NULL; 1587 atomic_set(&devr->p0->usecnt, 0); 1588 1589 devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, &cq_attr, NULL, NULL); 1590 if (IS_ERR(devr->c0)) { 1591 ret = PTR_ERR(devr->c0); 1592 goto error1; 1593 } 1594 devr->c0->device = &dev->ib_dev; 1595 devr->c0->uobject = NULL; 1596 devr->c0->comp_handler = NULL; 1597 devr->c0->event_handler = NULL; 1598 devr->c0->cq_context = NULL; 1599 atomic_set(&devr->c0->usecnt, 0); 1600 1601 devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL); 1602 if (IS_ERR(devr->x0)) { 1603 ret = PTR_ERR(devr->x0); 1604 goto error2; 1605 } 1606 devr->x0->device = &dev->ib_dev; 1607 devr->x0->inode = NULL; 1608 atomic_set(&devr->x0->usecnt, 0); 1609 mutex_init(&devr->x0->tgt_qp_mutex); 1610 INIT_LIST_HEAD(&devr->x0->tgt_qp_list); 1611 1612 devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL); 1613 if (IS_ERR(devr->x1)) { 1614 ret = PTR_ERR(devr->x1); 1615 goto error3; 1616 } 1617 devr->x1->device = &dev->ib_dev; 1618 devr->x1->inode = NULL; 1619 atomic_set(&devr->x1->usecnt, 0); 1620 mutex_init(&devr->x1->tgt_qp_mutex); 1621 INIT_LIST_HEAD(&devr->x1->tgt_qp_list); 1622 1623 memset(&attr, 0, sizeof(attr)); 1624 attr.attr.max_sge = 1; 1625 attr.attr.max_wr = 1; 1626 attr.srq_type = IB_SRQT_XRC; 1627 attr.ext.xrc.cq = devr->c0; 1628 attr.ext.xrc.xrcd = devr->x0; 1629 1630 devr->s0 = mlx5_ib_create_srq(devr->p0, &attr, NULL); 1631 if (IS_ERR(devr->s0)) { 1632 ret = PTR_ERR(devr->s0); 1633 goto error4; 1634 } 1635 devr->s0->device = &dev->ib_dev; 1636 devr->s0->pd = devr->p0; 1637 devr->s0->uobject = NULL; 1638 devr->s0->event_handler = NULL; 1639 devr->s0->srq_context = NULL; 1640 devr->s0->srq_type = IB_SRQT_XRC; 1641 devr->s0->ext.xrc.xrcd = devr->x0; 1642 devr->s0->ext.xrc.cq = devr->c0; 1643 atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt); 1644 atomic_inc(&devr->s0->ext.xrc.cq->usecnt); 1645 atomic_inc(&devr->p0->usecnt); 1646 atomic_set(&devr->s0->usecnt, 0); 1647 1648 memset(&attr, 0, sizeof(attr)); 1649 attr.attr.max_sge = 1; 1650 attr.attr.max_wr = 1; 1651 attr.srq_type = IB_SRQT_BASIC; 1652 devr->s1 = mlx5_ib_create_srq(devr->p0, &attr, NULL); 1653 if (IS_ERR(devr->s1)) { 1654 ret = PTR_ERR(devr->s1); 1655 goto error5; 1656 } 1657 devr->s1->device = &dev->ib_dev; 1658 devr->s1->pd = devr->p0; 1659 devr->s1->uobject = NULL; 1660 devr->s1->event_handler = NULL; 1661 devr->s1->srq_context = NULL; 1662 devr->s1->srq_type = IB_SRQT_BASIC; 1663 devr->s1->ext.xrc.cq = devr->c0; 1664 atomic_inc(&devr->p0->usecnt); 1665 atomic_set(&devr->s1->usecnt, 0); 1666 1667 return 0; 1668 1669 error5: 1670 mlx5_ib_destroy_srq(devr->s0); 1671 error4: 1672 mlx5_ib_dealloc_xrcd(devr->x1); 1673 error3: 1674 mlx5_ib_dealloc_xrcd(devr->x0); 1675 error2: 1676 mlx5_ib_destroy_cq(devr->c0); 1677 error1: 1678 mlx5_ib_dealloc_pd(devr->p0); 1679 error0: 1680 return ret; 1681 } 1682 1683 static void destroy_dev_resources(struct mlx5_ib_resources *devr) 1684 { 1685 mlx5_ib_destroy_srq(devr->s1); 1686 mlx5_ib_destroy_srq(devr->s0); 1687 mlx5_ib_dealloc_xrcd(devr->x0); 1688 mlx5_ib_dealloc_xrcd(devr->x1); 1689 mlx5_ib_destroy_cq(devr->c0); 1690 mlx5_ib_dealloc_pd(devr->p0); 1691 } 1692 1693 static void enable_dc_tracer(struct mlx5_ib_dev *dev) 1694 { 1695 struct device *device = dev->ib_dev.dma_device; 1696 struct mlx5_dc_tracer *dct = &dev->dctr; 1697 int order; 1698 void *tmp; 1699 int size; 1700 int err; 1701 1702 size = MLX5_CAP_GEN(dev->mdev, num_ports) * 4096; 1703 if (size <= PAGE_SIZE) 1704 order = 0; 1705 else 1706 order = 1; 1707 1708 dct->pg = alloc_pages(GFP_KERNEL, order); 1709 if (!dct->pg) { 1710 mlx5_ib_err(dev, "failed to allocate %d pages\n", order); 1711 return; 1712 } 1713 1714 tmp = page_address(dct->pg); 1715 memset(tmp, 0xff, size); 1716 1717 dct->size = size; 1718 dct->order = order; 1719 dct->dma = dma_map_page(device, dct->pg, 0, size, DMA_FROM_DEVICE); 1720 if (dma_mapping_error(device, dct->dma)) { 1721 mlx5_ib_err(dev, "dma mapping error\n"); 1722 goto map_err; 1723 } 1724 1725 err = mlx5_core_set_dc_cnak_trace(dev->mdev, 1, dct->dma); 1726 if (err) { 1727 mlx5_ib_warn(dev, "failed to enable DC tracer\n"); 1728 goto cmd_err; 1729 } 1730 1731 return; 1732 1733 cmd_err: 1734 dma_unmap_page(device, dct->dma, size, DMA_FROM_DEVICE); 1735 map_err: 1736 __free_pages(dct->pg, dct->order); 1737 dct->pg = NULL; 1738 } 1739 1740 static void disable_dc_tracer(struct mlx5_ib_dev *dev) 1741 { 1742 struct device *device = dev->ib_dev.dma_device; 1743 struct mlx5_dc_tracer *dct = &dev->dctr; 1744 int err; 1745 1746 if (!dct->pg) 1747 return; 1748 1749 err = mlx5_core_set_dc_cnak_trace(dev->mdev, 0, dct->dma); 1750 if (err) { 1751 mlx5_ib_warn(dev, "failed to disable DC tracer\n"); 1752 return; 1753 } 1754 1755 dma_unmap_page(device, dct->dma, dct->size, DMA_FROM_DEVICE); 1756 __free_pages(dct->pg, dct->order); 1757 dct->pg = NULL; 1758 } 1759 1760 enum { 1761 MLX5_DC_CNAK_SIZE = 128, 1762 MLX5_NUM_BUF_IN_PAGE = PAGE_SIZE / MLX5_DC_CNAK_SIZE, 1763 MLX5_CNAK_TX_CQ_SIGNAL_FACTOR = 128, 1764 MLX5_DC_CNAK_SL = 0, 1765 MLX5_DC_CNAK_VL = 0, 1766 }; 1767 1768 static int init_dc_improvements(struct mlx5_ib_dev *dev) 1769 { 1770 if (!mlx5_core_is_pf(dev->mdev)) 1771 return 0; 1772 1773 if (!(MLX5_CAP_GEN(dev->mdev, dc_cnak_trace))) 1774 return 0; 1775 1776 enable_dc_tracer(dev); 1777 1778 return 0; 1779 } 1780 1781 static void cleanup_dc_improvements(struct mlx5_ib_dev *dev) 1782 { 1783 1784 disable_dc_tracer(dev); 1785 } 1786 1787 static void mlx5_ib_dealloc_q_port_counter(struct mlx5_ib_dev *dev, u8 port_num) 1788 { 1789 mlx5_vport_dealloc_q_counter(dev->mdev, 1790 MLX5_INTERFACE_PROTOCOL_IB, 1791 dev->port[port_num].q_cnt_id); 1792 dev->port[port_num].q_cnt_id = 0; 1793 } 1794 1795 static void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev) 1796 { 1797 unsigned int i; 1798 1799 for (i = 0; i < dev->num_ports; i++) 1800 mlx5_ib_dealloc_q_port_counter(dev, i); 1801 } 1802 1803 static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev) 1804 { 1805 int i; 1806 int ret; 1807 1808 for (i = 0; i < dev->num_ports; i++) { 1809 ret = mlx5_vport_alloc_q_counter(dev->mdev, 1810 MLX5_INTERFACE_PROTOCOL_IB, 1811 &dev->port[i].q_cnt_id); 1812 if (ret) { 1813 mlx5_ib_warn(dev, 1814 "couldn't allocate queue counter for port %d\n", 1815 i + 1); 1816 goto dealloc_counters; 1817 } 1818 } 1819 1820 return 0; 1821 1822 dealloc_counters: 1823 while (--i >= 0) 1824 mlx5_ib_dealloc_q_port_counter(dev, i); 1825 1826 return ret; 1827 } 1828 1829 struct port_attribute { 1830 struct attribute attr; 1831 ssize_t (*show)(struct mlx5_ib_port *, 1832 struct port_attribute *, char *buf); 1833 ssize_t (*store)(struct mlx5_ib_port *, 1834 struct port_attribute *, 1835 const char *buf, size_t count); 1836 }; 1837 1838 struct port_counter_attribute { 1839 struct port_attribute attr; 1840 size_t offset; 1841 }; 1842 1843 static ssize_t port_attr_show(struct kobject *kobj, 1844 struct attribute *attr, char *buf) 1845 { 1846 struct port_attribute *port_attr = 1847 container_of(attr, struct port_attribute, attr); 1848 struct mlx5_ib_port_sysfs_group *p = 1849 container_of(kobj, struct mlx5_ib_port_sysfs_group, 1850 kobj); 1851 struct mlx5_ib_port *mibport = container_of(p, struct mlx5_ib_port, 1852 group); 1853 1854 if (!port_attr->show) 1855 return -EIO; 1856 1857 return port_attr->show(mibport, port_attr, buf); 1858 } 1859 1860 static ssize_t show_port_counter(struct mlx5_ib_port *p, 1861 struct port_attribute *port_attr, 1862 char *buf) 1863 { 1864 int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out); 1865 struct port_counter_attribute *counter_attr = 1866 container_of(port_attr, struct port_counter_attribute, attr); 1867 void *out; 1868 int ret; 1869 1870 out = mlx5_vzalloc(outlen); 1871 if (!out) 1872 return -ENOMEM; 1873 1874 ret = mlx5_vport_query_q_counter(p->dev->mdev, 1875 p->q_cnt_id, 0, 1876 out, outlen); 1877 if (ret) 1878 goto free; 1879 1880 ret = sprintf(buf, "%d\n", 1881 be32_to_cpu(*(__be32 *)(out + counter_attr->offset))); 1882 1883 free: 1884 kfree(out); 1885 return ret; 1886 } 1887 1888 #define PORT_COUNTER_ATTR(_name) \ 1889 struct port_counter_attribute port_counter_attr_##_name = { \ 1890 .attr = __ATTR(_name, S_IRUGO, show_port_counter, NULL), \ 1891 .offset = MLX5_BYTE_OFF(query_q_counter_out, _name) \ 1892 } 1893 1894 static PORT_COUNTER_ATTR(rx_write_requests); 1895 static PORT_COUNTER_ATTR(rx_read_requests); 1896 static PORT_COUNTER_ATTR(rx_atomic_requests); 1897 static PORT_COUNTER_ATTR(rx_dct_connect); 1898 static PORT_COUNTER_ATTR(out_of_buffer); 1899 static PORT_COUNTER_ATTR(out_of_sequence); 1900 static PORT_COUNTER_ATTR(duplicate_request); 1901 static PORT_COUNTER_ATTR(rnr_nak_retry_err); 1902 static PORT_COUNTER_ATTR(packet_seq_err); 1903 static PORT_COUNTER_ATTR(implied_nak_seq_err); 1904 static PORT_COUNTER_ATTR(local_ack_timeout_err); 1905 1906 static struct attribute *counter_attrs[] = { 1907 &port_counter_attr_rx_write_requests.attr.attr, 1908 &port_counter_attr_rx_read_requests.attr.attr, 1909 &port_counter_attr_rx_atomic_requests.attr.attr, 1910 &port_counter_attr_rx_dct_connect.attr.attr, 1911 &port_counter_attr_out_of_buffer.attr.attr, 1912 &port_counter_attr_out_of_sequence.attr.attr, 1913 &port_counter_attr_duplicate_request.attr.attr, 1914 &port_counter_attr_rnr_nak_retry_err.attr.attr, 1915 &port_counter_attr_packet_seq_err.attr.attr, 1916 &port_counter_attr_implied_nak_seq_err.attr.attr, 1917 &port_counter_attr_local_ack_timeout_err.attr.attr, 1918 NULL 1919 }; 1920 1921 static struct attribute_group port_counters_group = { 1922 .name = "counters", 1923 .attrs = counter_attrs 1924 }; 1925 1926 static const struct sysfs_ops port_sysfs_ops = { 1927 .show = port_attr_show 1928 }; 1929 1930 static struct kobj_type port_type = { 1931 .sysfs_ops = &port_sysfs_ops, 1932 }; 1933 1934 static int add_port_attrs(struct mlx5_ib_dev *dev, 1935 struct kobject *parent, 1936 struct mlx5_ib_port_sysfs_group *port, 1937 u8 port_num) 1938 { 1939 int ret; 1940 1941 ret = kobject_init_and_add(&port->kobj, &port_type, 1942 parent, 1943 "%d", port_num); 1944 if (ret) 1945 return ret; 1946 1947 if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt) && 1948 MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) { 1949 ret = sysfs_create_group(&port->kobj, &port_counters_group); 1950 if (ret) 1951 goto put_kobj; 1952 } 1953 1954 port->enabled = true; 1955 return ret; 1956 1957 put_kobj: 1958 kobject_put(&port->kobj); 1959 return ret; 1960 } 1961 1962 static void destroy_ports_attrs(struct mlx5_ib_dev *dev, 1963 unsigned int num_ports) 1964 { 1965 unsigned int i; 1966 1967 for (i = 0; i < num_ports; i++) { 1968 struct mlx5_ib_port_sysfs_group *port = 1969 &dev->port[i].group; 1970 1971 if (!port->enabled) 1972 continue; 1973 1974 if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt) && 1975 MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) 1976 sysfs_remove_group(&port->kobj, 1977 &port_counters_group); 1978 kobject_put(&port->kobj); 1979 port->enabled = false; 1980 } 1981 1982 if (dev->ports_parent) { 1983 kobject_put(dev->ports_parent); 1984 dev->ports_parent = NULL; 1985 } 1986 } 1987 1988 static int create_port_attrs(struct mlx5_ib_dev *dev) 1989 { 1990 int ret = 0; 1991 unsigned int i = 0; 1992 struct device *device = &dev->ib_dev.dev; 1993 1994 dev->ports_parent = kobject_create_and_add("mlx5_ports", 1995 &device->kobj); 1996 if (!dev->ports_parent) 1997 return -ENOMEM; 1998 1999 for (i = 0; i < dev->num_ports; i++) { 2000 ret = add_port_attrs(dev, 2001 dev->ports_parent, 2002 &dev->port[i].group, 2003 i + 1); 2004 2005 if (ret) 2006 goto _destroy_ports_attrs; 2007 } 2008 2009 return 0; 2010 2011 _destroy_ports_attrs: 2012 destroy_ports_attrs(dev, i); 2013 return ret; 2014 } 2015 2016 static void *mlx5_ib_add(struct mlx5_core_dev *mdev) 2017 { 2018 struct mlx5_ib_dev *dev; 2019 int err; 2020 int i; 2021 2022 printk_once(KERN_INFO "%s", mlx5_version); 2023 2024 dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev)); 2025 if (!dev) 2026 return NULL; 2027 2028 dev->mdev = mdev; 2029 2030 dev->port = kcalloc(MLX5_CAP_GEN(mdev, num_ports), sizeof(*dev->port), 2031 GFP_KERNEL); 2032 if (!dev->port) 2033 goto err_dealloc; 2034 2035 for (i = 0; i < MLX5_CAP_GEN(mdev, num_ports); i++) { 2036 dev->port[i].dev = dev; 2037 dev->port[i].port_num = i; 2038 dev->port[i].port_gone = 0; 2039 memset(dev->port[i].gid_table, 0, sizeof(dev->port[i].gid_table)); 2040 } 2041 2042 err = get_port_caps(dev); 2043 if (err) 2044 goto err_free_port; 2045 2046 if (mlx5_use_mad_ifc(dev)) 2047 get_ext_port_caps(dev); 2048 2049 if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) == 2050 IB_LINK_LAYER_ETHERNET) { 2051 if (MLX5_CAP_GEN(mdev, roce)) { 2052 err = mlx5_nic_vport_enable_roce(mdev); 2053 if (err) 2054 goto err_free_port; 2055 } else { 2056 goto err_free_port; 2057 } 2058 } 2059 2060 MLX5_INIT_DOORBELL_LOCK(&dev->uar_lock); 2061 2062 strlcpy(dev->ib_dev.name, "mlx5_%d", IB_DEVICE_NAME_MAX); 2063 dev->ib_dev.owner = THIS_MODULE; 2064 dev->ib_dev.node_type = RDMA_NODE_IB_CA; 2065 dev->ib_dev.local_dma_lkey = mdev->special_contexts.resd_lkey; 2066 dev->num_ports = MLX5_CAP_GEN(mdev, num_ports); 2067 dev->ib_dev.phys_port_cnt = dev->num_ports; 2068 dev->ib_dev.num_comp_vectors = 2069 dev->mdev->priv.eq_table.num_comp_vectors; 2070 dev->ib_dev.dma_device = &mdev->pdev->dev; 2071 2072 dev->ib_dev.uverbs_abi_ver = MLX5_IB_UVERBS_ABI_VERSION; 2073 dev->ib_dev.uverbs_cmd_mask = 2074 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | 2075 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | 2076 (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | 2077 (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | 2078 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | 2079 (1ull << IB_USER_VERBS_CMD_REG_MR) | 2080 (1ull << IB_USER_VERBS_CMD_DEREG_MR) | 2081 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | 2082 (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | 2083 (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | 2084 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | 2085 (1ull << IB_USER_VERBS_CMD_CREATE_QP) | 2086 (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | 2087 (1ull << IB_USER_VERBS_CMD_QUERY_QP) | 2088 (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | 2089 (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | 2090 (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | 2091 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | 2092 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | 2093 (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | 2094 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | 2095 (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) | 2096 (1ull << IB_USER_VERBS_CMD_OPEN_QP); 2097 2098 dev->ib_dev.query_device = mlx5_ib_query_device; 2099 dev->ib_dev.query_port = mlx5_ib_query_port; 2100 dev->ib_dev.get_link_layer = mlx5_ib_port_link_layer; 2101 dev->ib_dev.get_netdev = mlx5_ib_get_netdev; 2102 dev->ib_dev.query_gid = mlx5_ib_query_gid; 2103 dev->ib_dev.query_pkey = mlx5_ib_query_pkey; 2104 dev->ib_dev.modify_device = mlx5_ib_modify_device; 2105 dev->ib_dev.modify_port = mlx5_ib_modify_port; 2106 dev->ib_dev.alloc_ucontext = mlx5_ib_alloc_ucontext; 2107 dev->ib_dev.dealloc_ucontext = mlx5_ib_dealloc_ucontext; 2108 dev->ib_dev.mmap = mlx5_ib_mmap; 2109 dev->ib_dev.alloc_pd = mlx5_ib_alloc_pd; 2110 dev->ib_dev.dealloc_pd = mlx5_ib_dealloc_pd; 2111 dev->ib_dev.create_ah = mlx5_ib_create_ah; 2112 dev->ib_dev.query_ah = mlx5_ib_query_ah; 2113 dev->ib_dev.destroy_ah = mlx5_ib_destroy_ah; 2114 dev->ib_dev.create_srq = mlx5_ib_create_srq; 2115 dev->ib_dev.modify_srq = mlx5_ib_modify_srq; 2116 dev->ib_dev.query_srq = mlx5_ib_query_srq; 2117 dev->ib_dev.destroy_srq = mlx5_ib_destroy_srq; 2118 dev->ib_dev.post_srq_recv = mlx5_ib_post_srq_recv; 2119 dev->ib_dev.create_qp = mlx5_ib_create_qp; 2120 dev->ib_dev.modify_qp = mlx5_ib_modify_qp; 2121 dev->ib_dev.query_qp = mlx5_ib_query_qp; 2122 dev->ib_dev.destroy_qp = mlx5_ib_destroy_qp; 2123 dev->ib_dev.post_send = mlx5_ib_post_send; 2124 dev->ib_dev.post_recv = mlx5_ib_post_recv; 2125 dev->ib_dev.create_cq = mlx5_ib_create_cq; 2126 dev->ib_dev.modify_cq = mlx5_ib_modify_cq; 2127 dev->ib_dev.resize_cq = mlx5_ib_resize_cq; 2128 dev->ib_dev.destroy_cq = mlx5_ib_destroy_cq; 2129 dev->ib_dev.poll_cq = mlx5_ib_poll_cq; 2130 dev->ib_dev.req_notify_cq = mlx5_ib_arm_cq; 2131 dev->ib_dev.get_dma_mr = mlx5_ib_get_dma_mr; 2132 dev->ib_dev.reg_user_mr = mlx5_ib_reg_user_mr; 2133 dev->ib_dev.reg_phys_mr = mlx5_ib_reg_phys_mr; 2134 dev->ib_dev.dereg_mr = mlx5_ib_dereg_mr; 2135 dev->ib_dev.attach_mcast = mlx5_ib_mcg_attach; 2136 dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach; 2137 dev->ib_dev.process_mad = mlx5_ib_process_mad; 2138 dev->ib_dev.alloc_fast_reg_mr = mlx5_ib_alloc_fast_reg_mr; 2139 dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list; 2140 dev->ib_dev.free_fast_reg_page_list = mlx5_ib_free_fast_reg_page_list; 2141 2142 if (MLX5_CAP_GEN(mdev, xrc)) { 2143 dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd; 2144 dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd; 2145 dev->ib_dev.uverbs_cmd_mask |= 2146 (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) | 2147 (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD); 2148 } 2149 2150 err = init_node_data(dev); 2151 if (err) 2152 goto err_disable_roce; 2153 2154 mutex_init(&dev->cap_mask_mutex); 2155 INIT_LIST_HEAD(&dev->qp_list); 2156 spin_lock_init(&dev->reset_flow_resource_lock); 2157 2158 err = create_dev_resources(&dev->devr); 2159 if (err) 2160 goto err_disable_roce; 2161 2162 2163 err = mlx5_ib_alloc_q_counters(dev); 2164 if (err) 2165 goto err_odp; 2166 2167 err = ib_register_device(&dev->ib_dev, NULL); 2168 if (err) 2169 goto err_q_cnt; 2170 2171 err = create_umr_res(dev); 2172 if (err) 2173 goto err_dev; 2174 2175 if (MLX5_CAP_GEN(dev->mdev, port_type) == 2176 MLX5_CAP_PORT_TYPE_IB) { 2177 if (init_dc_improvements(dev)) 2178 mlx5_ib_dbg(dev, "init_dc_improvements - continuing\n"); 2179 } 2180 2181 err = create_port_attrs(dev); 2182 if (err) 2183 goto err_dc; 2184 2185 for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) { 2186 err = device_create_file(&dev->ib_dev.dev, 2187 mlx5_class_attributes[i]); 2188 if (err) 2189 goto err_port_attrs; 2190 } 2191 2192 if (1) { 2193 struct thread *rl_thread = NULL; 2194 struct proc *rl_proc = NULL; 2195 2196 for (i = 0; i < MLX5_CAP_GEN(mdev, num_ports); i++) { 2197 (void) kproc_kthread_add(mlx5_ib_roce_port_update, dev->port + i, &rl_proc, &rl_thread, 2198 RFHIGHPID, 0, "mlx5-ib-roce-port", "mlx5-ib-roce_port-%d", i); 2199 } 2200 } 2201 2202 dev->ib_active = true; 2203 2204 return dev; 2205 2206 err_port_attrs: 2207 destroy_ports_attrs(dev, dev->num_ports); 2208 2209 err_dc: 2210 if (MLX5_CAP_GEN(dev->mdev, port_type) == 2211 MLX5_CAP_PORT_TYPE_IB) 2212 cleanup_dc_improvements(dev); 2213 destroy_umrc_res(dev); 2214 2215 err_dev: 2216 ib_unregister_device(&dev->ib_dev); 2217 2218 err_q_cnt: 2219 mlx5_ib_dealloc_q_counters(dev); 2220 2221 err_odp: 2222 destroy_dev_resources(&dev->devr); 2223 2224 err_disable_roce: 2225 if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) == 2226 IB_LINK_LAYER_ETHERNET && MLX5_CAP_GEN(mdev, roce)) 2227 mlx5_nic_vport_disable_roce(mdev); 2228 err_free_port: 2229 kfree(dev->port); 2230 2231 err_dealloc: 2232 ib_dealloc_device((struct ib_device *)dev); 2233 2234 return NULL; 2235 } 2236 2237 static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) 2238 { 2239 struct mlx5_ib_dev *dev = context; 2240 int i; 2241 2242 for (i = 0; i < MLX5_CAP_GEN(mdev, num_ports); i++) { 2243 dev->port[i].port_gone = 1; 2244 while (dev->port[i].port_gone != 2) 2245 pause("W", hz); 2246 } 2247 2248 for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) { 2249 device_remove_file(&dev->ib_dev.dev, 2250 mlx5_class_attributes[i]); 2251 } 2252 2253 destroy_ports_attrs(dev, dev->num_ports); 2254 if (MLX5_CAP_GEN(dev->mdev, port_type) == 2255 MLX5_CAP_PORT_TYPE_IB) 2256 cleanup_dc_improvements(dev); 2257 mlx5_ib_dealloc_q_counters(dev); 2258 ib_unregister_device(&dev->ib_dev); 2259 destroy_umrc_res(dev); 2260 destroy_dev_resources(&dev->devr); 2261 2262 if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) == 2263 IB_LINK_LAYER_ETHERNET && MLX5_CAP_GEN(mdev, roce)) 2264 mlx5_nic_vport_disable_roce(mdev); 2265 2266 kfree(dev->port); 2267 ib_dealloc_device(&dev->ib_dev); 2268 } 2269 2270 static struct mlx5_interface mlx5_ib_interface = { 2271 .add = mlx5_ib_add, 2272 .remove = mlx5_ib_remove, 2273 .event = mlx5_ib_event, 2274 .protocol = MLX5_INTERFACE_PROTOCOL_IB, 2275 }; 2276 2277 static int __init mlx5_ib_init(void) 2278 { 2279 int err; 2280 2281 if (deprecated_prof_sel != 2) 2282 printf("mlx5_ib: WARN: ""prof_sel is deprecated for mlx5_ib, set it for mlx5_core\n"); 2283 2284 err = mlx5_register_interface(&mlx5_ib_interface); 2285 if (err) 2286 goto clean_odp; 2287 2288 mlx5_ib_wq = create_singlethread_workqueue("mlx5_ib_wq"); 2289 if (!mlx5_ib_wq) { 2290 printf("mlx5_ib: ERR: ""%s: failed to create mlx5_ib_wq\n", __func__); 2291 goto err_unreg; 2292 } 2293 2294 return err; 2295 2296 err_unreg: 2297 mlx5_unregister_interface(&mlx5_ib_interface); 2298 2299 clean_odp: 2300 return err; 2301 } 2302 2303 static void __exit mlx5_ib_cleanup(void) 2304 { 2305 destroy_workqueue(mlx5_ib_wq); 2306 mlx5_unregister_interface(&mlx5_ib_interface); 2307 } 2308 2309 module_init_order(mlx5_ib_init, SI_ORDER_THIRD); 2310 module_exit_order(mlx5_ib_cleanup, SI_ORDER_THIRD); 2311