1 /* 2 * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <linux/highmem.h> 34 #include <linux/module.h> 35 #include <linux/init.h> 36 #include <linux/errno.h> 37 #include <linux/pci.h> 38 #include <linux/dma-mapping.h> 39 #include <linux/slab.h> 40 #if defined(CONFIG_X86) 41 #include <asm/pat.h> 42 #endif 43 #include <linux/sched.h> 44 #include <linux/delay.h> 45 #include <rdma/ib_user_verbs.h> 46 #include <rdma/ib_addr.h> 47 #include <rdma/ib_cache.h> 48 #include <linux/mlx5/port.h> 49 #include <linux/mlx5/vport.h> 50 #include <linux/list.h> 51 #include <rdma/ib_smi.h> 52 #include <rdma/ib_umem.h> 53 #include <linux/in.h> 54 #include <linux/etherdevice.h> 55 #include <linux/mlx5/fs.h> 56 #include <linux/mlx5/vport.h> 57 #include "mlx5_ib.h" 58 59 #define DRIVER_NAME "mlx5_ib" 60 #define DRIVER_VERSION "2.2-1" 61 #define DRIVER_RELDATE "Feb 2014" 62 63 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>"); 64 MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver"); 65 MODULE_LICENSE("Dual BSD/GPL"); 66 MODULE_VERSION(DRIVER_VERSION); 67 68 static int deprecated_prof_sel = 2; 69 module_param_named(prof_sel, deprecated_prof_sel, int, 0444); 70 MODULE_PARM_DESC(prof_sel, "profile selector. Deprecated here. Moved to module mlx5_core"); 71 72 static char mlx5_version[] = 73 DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v" 74 DRIVER_VERSION " (" DRIVER_RELDATE ")\n"; 75 76 enum { 77 MLX5_ATOMIC_SIZE_QP_8BYTES = 1 << 3, 78 }; 79 80 static enum rdma_link_layer 81 mlx5_port_type_cap_to_rdma_ll(int port_type_cap) 82 { 83 switch (port_type_cap) { 84 case MLX5_CAP_PORT_TYPE_IB: 85 return IB_LINK_LAYER_INFINIBAND; 86 case MLX5_CAP_PORT_TYPE_ETH: 87 return IB_LINK_LAYER_ETHERNET; 88 default: 89 return IB_LINK_LAYER_UNSPECIFIED; 90 } 91 } 92 93 static enum rdma_link_layer 94 mlx5_ib_port_link_layer(struct ib_device *device, u8 port_num) 95 { 96 struct mlx5_ib_dev *dev = to_mdev(device); 97 int port_type_cap = MLX5_CAP_GEN(dev->mdev, port_type); 98 99 return mlx5_port_type_cap_to_rdma_ll(port_type_cap); 100 } 101 102 static int mlx5_netdev_event(struct notifier_block *this, 103 unsigned long event, void *ptr) 104 { 105 struct net_device *ndev = netdev_notifier_info_to_dev(ptr); 106 struct mlx5_ib_dev *ibdev = container_of(this, struct mlx5_ib_dev, 107 roce.nb); 108 109 switch (event) { 110 case NETDEV_REGISTER: 111 case NETDEV_UNREGISTER: 112 write_lock(&ibdev->roce.netdev_lock); 113 if (ndev->dev.parent == &ibdev->mdev->pdev->dev) 114 ibdev->roce.netdev = (event == NETDEV_UNREGISTER) ? 115 NULL : ndev; 116 write_unlock(&ibdev->roce.netdev_lock); 117 break; 118 119 case NETDEV_UP: 120 case NETDEV_DOWN: { 121 struct net_device *lag_ndev = mlx5_lag_get_roce_netdev(ibdev->mdev); 122 struct net_device *upper = NULL; 123 124 if (lag_ndev) { 125 upper = netdev_master_upper_dev_get(lag_ndev); 126 dev_put(lag_ndev); 127 } 128 129 if ((upper == ndev || (!upper && ndev == ibdev->roce.netdev)) 130 && ibdev->ib_active) { 131 struct ib_event ibev = { }; 132 133 ibev.device = &ibdev->ib_dev; 134 ibev.event = (event == NETDEV_UP) ? 135 IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR; 136 ibev.element.port_num = 1; 137 ib_dispatch_event(&ibev); 138 } 139 break; 140 } 141 142 default: 143 break; 144 } 145 146 return NOTIFY_DONE; 147 } 148 149 static struct net_device *mlx5_ib_get_netdev(struct ib_device *device, 150 u8 port_num) 151 { 152 struct mlx5_ib_dev *ibdev = to_mdev(device); 153 struct net_device *ndev; 154 155 ndev = mlx5_lag_get_roce_netdev(ibdev->mdev); 156 if (ndev) 157 return ndev; 158 159 /* Ensure ndev does not disappear before we invoke dev_hold() 160 */ 161 read_lock(&ibdev->roce.netdev_lock); 162 ndev = ibdev->roce.netdev; 163 if (ndev) 164 dev_hold(ndev); 165 read_unlock(&ibdev->roce.netdev_lock); 166 167 return ndev; 168 } 169 170 static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, 171 struct ib_port_attr *props) 172 { 173 struct mlx5_ib_dev *dev = to_mdev(device); 174 struct net_device *ndev, *upper; 175 enum ib_mtu ndev_ib_mtu; 176 u16 qkey_viol_cntr; 177 178 memset(props, 0, sizeof(*props)); 179 180 props->port_cap_flags |= IB_PORT_CM_SUP; 181 props->port_cap_flags |= IB_PORT_IP_BASED_GIDS; 182 183 props->gid_tbl_len = MLX5_CAP_ROCE(dev->mdev, 184 roce_address_table_size); 185 props->max_mtu = IB_MTU_4096; 186 props->max_msg_sz = 1 << MLX5_CAP_GEN(dev->mdev, log_max_msg); 187 props->pkey_tbl_len = 1; 188 props->state = IB_PORT_DOWN; 189 props->phys_state = 3; 190 191 mlx5_query_nic_vport_qkey_viol_cntr(dev->mdev, &qkey_viol_cntr); 192 props->qkey_viol_cntr = qkey_viol_cntr; 193 194 ndev = mlx5_ib_get_netdev(device, port_num); 195 if (!ndev) 196 return 0; 197 198 if (mlx5_lag_is_active(dev->mdev)) { 199 rcu_read_lock(); 200 upper = netdev_master_upper_dev_get_rcu(ndev); 201 if (upper) { 202 dev_put(ndev); 203 ndev = upper; 204 dev_hold(ndev); 205 } 206 rcu_read_unlock(); 207 } 208 209 if (netif_running(ndev) && netif_carrier_ok(ndev)) { 210 props->state = IB_PORT_ACTIVE; 211 props->phys_state = 5; 212 } 213 214 ndev_ib_mtu = iboe_get_mtu(ndev->mtu); 215 216 dev_put(ndev); 217 218 props->active_mtu = min(props->max_mtu, ndev_ib_mtu); 219 220 props->active_width = IB_WIDTH_4X; /* TODO */ 221 props->active_speed = IB_SPEED_QDR; /* TODO */ 222 223 return 0; 224 } 225 226 static void ib_gid_to_mlx5_roce_addr(const union ib_gid *gid, 227 const struct ib_gid_attr *attr, 228 void *mlx5_addr) 229 { 230 #define MLX5_SET_RA(p, f, v) MLX5_SET(roce_addr_layout, p, f, v) 231 char *mlx5_addr_l3_addr = MLX5_ADDR_OF(roce_addr_layout, mlx5_addr, 232 source_l3_address); 233 void *mlx5_addr_mac = MLX5_ADDR_OF(roce_addr_layout, mlx5_addr, 234 source_mac_47_32); 235 236 if (!gid) 237 return; 238 239 ether_addr_copy(mlx5_addr_mac, attr->ndev->dev_addr); 240 241 if (is_vlan_dev(attr->ndev)) { 242 MLX5_SET_RA(mlx5_addr, vlan_valid, 1); 243 MLX5_SET_RA(mlx5_addr, vlan_id, vlan_dev_vlan_id(attr->ndev)); 244 } 245 246 switch (attr->gid_type) { 247 case IB_GID_TYPE_IB: 248 MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_1); 249 break; 250 case IB_GID_TYPE_ROCE_UDP_ENCAP: 251 MLX5_SET_RA(mlx5_addr, roce_version, MLX5_ROCE_VERSION_2); 252 break; 253 254 default: 255 WARN_ON(true); 256 } 257 258 if (attr->gid_type != IB_GID_TYPE_IB) { 259 if (ipv6_addr_v4mapped((void *)gid)) 260 MLX5_SET_RA(mlx5_addr, roce_l3_type, 261 MLX5_ROCE_L3_TYPE_IPV4); 262 else 263 MLX5_SET_RA(mlx5_addr, roce_l3_type, 264 MLX5_ROCE_L3_TYPE_IPV6); 265 } 266 267 if ((attr->gid_type == IB_GID_TYPE_IB) || 268 !ipv6_addr_v4mapped((void *)gid)) 269 memcpy(mlx5_addr_l3_addr, gid, sizeof(*gid)); 270 else 271 memcpy(&mlx5_addr_l3_addr[12], &gid->raw[12], 4); 272 } 273 274 static int set_roce_addr(struct ib_device *device, u8 port_num, 275 unsigned int index, 276 const union ib_gid *gid, 277 const struct ib_gid_attr *attr) 278 { 279 struct mlx5_ib_dev *dev = to_mdev(device); 280 u32 in[MLX5_ST_SZ_DW(set_roce_address_in)] = {0}; 281 u32 out[MLX5_ST_SZ_DW(set_roce_address_out)] = {0}; 282 void *in_addr = MLX5_ADDR_OF(set_roce_address_in, in, roce_address); 283 enum rdma_link_layer ll = mlx5_ib_port_link_layer(device, port_num); 284 285 if (ll != IB_LINK_LAYER_ETHERNET) 286 return -EINVAL; 287 288 ib_gid_to_mlx5_roce_addr(gid, attr, in_addr); 289 290 MLX5_SET(set_roce_address_in, in, roce_address_index, index); 291 MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS); 292 return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); 293 } 294 295 static int mlx5_ib_add_gid(struct ib_device *device, u8 port_num, 296 unsigned int index, const union ib_gid *gid, 297 const struct ib_gid_attr *attr, 298 __always_unused void **context) 299 { 300 return set_roce_addr(device, port_num, index, gid, attr); 301 } 302 303 static int mlx5_ib_del_gid(struct ib_device *device, u8 port_num, 304 unsigned int index, __always_unused void **context) 305 { 306 return set_roce_addr(device, port_num, index, NULL, NULL); 307 } 308 309 __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num, 310 int index) 311 { 312 struct ib_gid_attr attr; 313 union ib_gid gid; 314 315 if (ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr)) 316 return 0; 317 318 if (!attr.ndev) 319 return 0; 320 321 dev_put(attr.ndev); 322 323 if (attr.gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP) 324 return 0; 325 326 return cpu_to_be16(MLX5_CAP_ROCE(dev->mdev, r_roce_min_src_udp_port)); 327 } 328 329 static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev) 330 { 331 if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB) 332 return !MLX5_CAP_GEN(dev->mdev, ib_virt); 333 return 0; 334 } 335 336 enum { 337 MLX5_VPORT_ACCESS_METHOD_MAD, 338 MLX5_VPORT_ACCESS_METHOD_HCA, 339 MLX5_VPORT_ACCESS_METHOD_NIC, 340 }; 341 342 static int mlx5_get_vport_access_method(struct ib_device *ibdev) 343 { 344 if (mlx5_use_mad_ifc(to_mdev(ibdev))) 345 return MLX5_VPORT_ACCESS_METHOD_MAD; 346 347 if (mlx5_ib_port_link_layer(ibdev, 1) == 348 IB_LINK_LAYER_ETHERNET) 349 return MLX5_VPORT_ACCESS_METHOD_NIC; 350 351 return MLX5_VPORT_ACCESS_METHOD_HCA; 352 } 353 354 static void get_atomic_caps(struct mlx5_ib_dev *dev, 355 struct ib_device_attr *props) 356 { 357 u8 tmp; 358 u8 atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations); 359 u8 atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp); 360 u8 atomic_req_8B_endianness_mode = 361 MLX5_CAP_ATOMIC(dev->mdev, atomic_req_8B_endianess_mode); 362 363 /* Check if HW supports 8 bytes standard atomic operations and capable 364 * of host endianness respond 365 */ 366 tmp = MLX5_ATOMIC_OPS_CMP_SWAP | MLX5_ATOMIC_OPS_FETCH_ADD; 367 if (((atomic_operations & tmp) == tmp) && 368 (atomic_size_qp & MLX5_ATOMIC_SIZE_QP_8BYTES) && 369 (atomic_req_8B_endianness_mode)) { 370 props->atomic_cap = IB_ATOMIC_HCA; 371 } else { 372 props->atomic_cap = IB_ATOMIC_NONE; 373 } 374 } 375 376 static int mlx5_query_system_image_guid(struct ib_device *ibdev, 377 __be64 *sys_image_guid) 378 { 379 struct mlx5_ib_dev *dev = to_mdev(ibdev); 380 struct mlx5_core_dev *mdev = dev->mdev; 381 u64 tmp; 382 int err; 383 384 switch (mlx5_get_vport_access_method(ibdev)) { 385 case MLX5_VPORT_ACCESS_METHOD_MAD: 386 return mlx5_query_mad_ifc_system_image_guid(ibdev, 387 sys_image_guid); 388 389 case MLX5_VPORT_ACCESS_METHOD_HCA: 390 err = mlx5_query_hca_vport_system_image_guid(mdev, &tmp); 391 break; 392 393 case MLX5_VPORT_ACCESS_METHOD_NIC: 394 err = mlx5_query_nic_vport_system_image_guid(mdev, &tmp); 395 break; 396 397 default: 398 return -EINVAL; 399 } 400 401 if (!err) 402 *sys_image_guid = cpu_to_be64(tmp); 403 404 return err; 405 406 } 407 408 static int mlx5_query_max_pkeys(struct ib_device *ibdev, 409 u16 *max_pkeys) 410 { 411 struct mlx5_ib_dev *dev = to_mdev(ibdev); 412 struct mlx5_core_dev *mdev = dev->mdev; 413 414 switch (mlx5_get_vport_access_method(ibdev)) { 415 case MLX5_VPORT_ACCESS_METHOD_MAD: 416 return mlx5_query_mad_ifc_max_pkeys(ibdev, max_pkeys); 417 418 case MLX5_VPORT_ACCESS_METHOD_HCA: 419 case MLX5_VPORT_ACCESS_METHOD_NIC: 420 *max_pkeys = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev, 421 pkey_table_size)); 422 return 0; 423 424 default: 425 return -EINVAL; 426 } 427 } 428 429 static int mlx5_query_vendor_id(struct ib_device *ibdev, 430 u32 *vendor_id) 431 { 432 struct mlx5_ib_dev *dev = to_mdev(ibdev); 433 434 switch (mlx5_get_vport_access_method(ibdev)) { 435 case MLX5_VPORT_ACCESS_METHOD_MAD: 436 return mlx5_query_mad_ifc_vendor_id(ibdev, vendor_id); 437 438 case MLX5_VPORT_ACCESS_METHOD_HCA: 439 case MLX5_VPORT_ACCESS_METHOD_NIC: 440 return mlx5_core_query_vendor_id(dev->mdev, vendor_id); 441 442 default: 443 return -EINVAL; 444 } 445 } 446 447 static int mlx5_query_node_guid(struct mlx5_ib_dev *dev, 448 __be64 *node_guid) 449 { 450 u64 tmp; 451 int err; 452 453 switch (mlx5_get_vport_access_method(&dev->ib_dev)) { 454 case MLX5_VPORT_ACCESS_METHOD_MAD: 455 return mlx5_query_mad_ifc_node_guid(dev, node_guid); 456 457 case MLX5_VPORT_ACCESS_METHOD_HCA: 458 err = mlx5_query_hca_vport_node_guid(dev->mdev, &tmp); 459 break; 460 461 case MLX5_VPORT_ACCESS_METHOD_NIC: 462 err = mlx5_query_nic_vport_node_guid(dev->mdev, &tmp); 463 break; 464 465 default: 466 return -EINVAL; 467 } 468 469 if (!err) 470 *node_guid = cpu_to_be64(tmp); 471 472 return err; 473 } 474 475 struct mlx5_reg_node_desc { 476 u8 desc[IB_DEVICE_NODE_DESC_MAX]; 477 }; 478 479 static int mlx5_query_node_desc(struct mlx5_ib_dev *dev, char *node_desc) 480 { 481 struct mlx5_reg_node_desc in; 482 483 if (mlx5_use_mad_ifc(dev)) 484 return mlx5_query_mad_ifc_node_desc(dev, node_desc); 485 486 memset(&in, 0, sizeof(in)); 487 488 return mlx5_core_access_reg(dev->mdev, &in, sizeof(in), node_desc, 489 sizeof(struct mlx5_reg_node_desc), 490 MLX5_REG_NODE_DESC, 0, 0); 491 } 492 493 static int mlx5_ib_query_device(struct ib_device *ibdev, 494 struct ib_device_attr *props, 495 struct ib_udata *uhw) 496 { 497 struct mlx5_ib_dev *dev = to_mdev(ibdev); 498 struct mlx5_core_dev *mdev = dev->mdev; 499 int err = -ENOMEM; 500 int max_sq_desc; 501 int max_rq_sg; 502 int max_sq_sg; 503 u64 min_page_size = 1ull << MLX5_CAP_GEN(mdev, log_pg_sz); 504 struct mlx5_ib_query_device_resp resp = {}; 505 size_t resp_len; 506 u64 max_tso; 507 508 resp_len = sizeof(resp.comp_mask) + sizeof(resp.response_length); 509 if (uhw->outlen && uhw->outlen < resp_len) 510 return -EINVAL; 511 else 512 resp.response_length = resp_len; 513 514 if (uhw->inlen && !ib_is_udata_cleared(uhw, 0, uhw->inlen)) 515 return -EINVAL; 516 517 memset(props, 0, sizeof(*props)); 518 err = mlx5_query_system_image_guid(ibdev, 519 &props->sys_image_guid); 520 if (err) 521 return err; 522 523 err = mlx5_query_max_pkeys(ibdev, &props->max_pkeys); 524 if (err) 525 return err; 526 527 err = mlx5_query_vendor_id(ibdev, &props->vendor_id); 528 if (err) 529 return err; 530 531 props->fw_ver = ((u64)fw_rev_maj(dev->mdev) << 32) | 532 (fw_rev_min(dev->mdev) << 16) | 533 fw_rev_sub(dev->mdev); 534 props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT | 535 IB_DEVICE_PORT_ACTIVE_EVENT | 536 IB_DEVICE_SYS_IMAGE_GUID | 537 IB_DEVICE_RC_RNR_NAK_GEN; 538 539 if (MLX5_CAP_GEN(mdev, pkv)) 540 props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR; 541 if (MLX5_CAP_GEN(mdev, qkv)) 542 props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR; 543 if (MLX5_CAP_GEN(mdev, apm)) 544 props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG; 545 if (MLX5_CAP_GEN(mdev, xrc)) 546 props->device_cap_flags |= IB_DEVICE_XRC; 547 if (MLX5_CAP_GEN(mdev, imaicl)) { 548 props->device_cap_flags |= IB_DEVICE_MEM_WINDOW | 549 IB_DEVICE_MEM_WINDOW_TYPE_2B; 550 props->max_mw = 1 << MLX5_CAP_GEN(mdev, log_max_mkey); 551 /* We support 'Gappy' memory registration too */ 552 props->device_cap_flags |= IB_DEVICE_SG_GAPS_REG; 553 } 554 props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; 555 if (MLX5_CAP_GEN(mdev, sho)) { 556 props->device_cap_flags |= IB_DEVICE_SIGNATURE_HANDOVER; 557 /* At this stage no support for signature handover */ 558 props->sig_prot_cap = IB_PROT_T10DIF_TYPE_1 | 559 IB_PROT_T10DIF_TYPE_2 | 560 IB_PROT_T10DIF_TYPE_3; 561 props->sig_guard_cap = IB_GUARD_T10DIF_CRC | 562 IB_GUARD_T10DIF_CSUM; 563 } 564 if (MLX5_CAP_GEN(mdev, block_lb_mc)) 565 props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK; 566 567 if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads)) { 568 if (MLX5_CAP_ETH(mdev, csum_cap)) 569 props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM; 570 571 if (field_avail(typeof(resp), tso_caps, uhw->outlen)) { 572 max_tso = MLX5_CAP_ETH(mdev, max_lso_cap); 573 if (max_tso) { 574 resp.tso_caps.max_tso = 1 << max_tso; 575 resp.tso_caps.supported_qpts |= 576 1 << IB_QPT_RAW_PACKET; 577 resp.response_length += sizeof(resp.tso_caps); 578 } 579 } 580 581 if (field_avail(typeof(resp), rss_caps, uhw->outlen)) { 582 resp.rss_caps.rx_hash_function = 583 MLX5_RX_HASH_FUNC_TOEPLITZ; 584 resp.rss_caps.rx_hash_fields_mask = 585 MLX5_RX_HASH_SRC_IPV4 | 586 MLX5_RX_HASH_DST_IPV4 | 587 MLX5_RX_HASH_SRC_IPV6 | 588 MLX5_RX_HASH_DST_IPV6 | 589 MLX5_RX_HASH_SRC_PORT_TCP | 590 MLX5_RX_HASH_DST_PORT_TCP | 591 MLX5_RX_HASH_SRC_PORT_UDP | 592 MLX5_RX_HASH_DST_PORT_UDP; 593 resp.response_length += sizeof(resp.rss_caps); 594 } 595 } else { 596 if (field_avail(typeof(resp), tso_caps, uhw->outlen)) 597 resp.response_length += sizeof(resp.tso_caps); 598 if (field_avail(typeof(resp), rss_caps, uhw->outlen)) 599 resp.response_length += sizeof(resp.rss_caps); 600 } 601 602 if (MLX5_CAP_GEN(mdev, ipoib_basic_offloads)) { 603 props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM; 604 props->device_cap_flags |= IB_DEVICE_UD_TSO; 605 } 606 607 if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) && 608 MLX5_CAP_ETH(dev->mdev, scatter_fcs)) 609 props->device_cap_flags |= IB_DEVICE_RAW_SCATTER_FCS; 610 611 if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS)) 612 props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING; 613 614 props->vendor_part_id = mdev->pdev->device; 615 props->hw_ver = mdev->pdev->revision; 616 617 props->max_mr_size = ~0ull; 618 props->page_size_cap = ~(min_page_size - 1); 619 props->max_qp = 1 << MLX5_CAP_GEN(mdev, log_max_qp); 620 props->max_qp_wr = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz); 621 max_rq_sg = MLX5_CAP_GEN(mdev, max_wqe_sz_rq) / 622 sizeof(struct mlx5_wqe_data_seg); 623 max_sq_desc = min_t(int, MLX5_CAP_GEN(mdev, max_wqe_sz_sq), 512); 624 max_sq_sg = (max_sq_desc - sizeof(struct mlx5_wqe_ctrl_seg) - 625 sizeof(struct mlx5_wqe_raddr_seg)) / 626 sizeof(struct mlx5_wqe_data_seg); 627 props->max_sge = min(max_rq_sg, max_sq_sg); 628 props->max_sge_rd = MLX5_MAX_SGE_RD; 629 props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq); 630 props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1; 631 props->max_mr = 1 << MLX5_CAP_GEN(mdev, log_max_mkey); 632 props->max_pd = 1 << MLX5_CAP_GEN(mdev, log_max_pd); 633 props->max_qp_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_req_qp); 634 props->max_qp_init_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_res_qp); 635 props->max_srq = 1 << MLX5_CAP_GEN(mdev, log_max_srq); 636 props->max_srq_wr = (1 << MLX5_CAP_GEN(mdev, log_max_srq_sz)) - 1; 637 props->local_ca_ack_delay = MLX5_CAP_GEN(mdev, local_ca_ack_delay); 638 props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; 639 props->max_srq_sge = max_rq_sg - 1; 640 props->max_fast_reg_page_list_len = 641 1 << MLX5_CAP_GEN(mdev, log_max_klm_list_size); 642 get_atomic_caps(dev, props); 643 props->masked_atomic_cap = IB_ATOMIC_NONE; 644 props->max_mcast_grp = 1 << MLX5_CAP_GEN(mdev, log_max_mcg); 645 props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg); 646 props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * 647 props->max_mcast_grp; 648 props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */ 649 props->max_ah = INT_MAX; 650 props->hca_core_clock = MLX5_CAP_GEN(mdev, device_frequency_khz); 651 props->timestamp_mask = 0x7FFFFFFFFFFFFFFFULL; 652 653 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 654 if (MLX5_CAP_GEN(mdev, pg)) 655 props->device_cap_flags |= IB_DEVICE_ON_DEMAND_PAGING; 656 props->odp_caps = dev->odp_caps; 657 #endif 658 659 if (MLX5_CAP_GEN(mdev, cd)) 660 props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL; 661 662 if (!mlx5_core_is_pf(mdev)) 663 props->device_cap_flags |= IB_DEVICE_VIRTUAL_FUNCTION; 664 665 if (mlx5_ib_port_link_layer(ibdev, 1) == 666 IB_LINK_LAYER_ETHERNET) { 667 props->rss_caps.max_rwq_indirection_tables = 668 1 << MLX5_CAP_GEN(dev->mdev, log_max_rqt); 669 props->rss_caps.max_rwq_indirection_table_size = 670 1 << MLX5_CAP_GEN(dev->mdev, log_max_rqt_size); 671 props->rss_caps.supported_qpts = 1 << IB_QPT_RAW_PACKET; 672 props->max_wq_type_rq = 673 1 << MLX5_CAP_GEN(dev->mdev, log_max_rq); 674 } 675 676 if (field_avail(typeof(resp), cqe_comp_caps, uhw->outlen)) { 677 resp.cqe_comp_caps.max_num = 678 MLX5_CAP_GEN(dev->mdev, cqe_compression) ? 679 MLX5_CAP_GEN(dev->mdev, cqe_compression_max_num) : 0; 680 resp.cqe_comp_caps.supported_format = 681 MLX5_IB_CQE_RES_FORMAT_HASH | 682 MLX5_IB_CQE_RES_FORMAT_CSUM; 683 resp.response_length += sizeof(resp.cqe_comp_caps); 684 } 685 686 if (field_avail(typeof(resp), packet_pacing_caps, uhw->outlen)) { 687 if (MLX5_CAP_QOS(mdev, packet_pacing) && 688 MLX5_CAP_GEN(mdev, qos)) { 689 resp.packet_pacing_caps.qp_rate_limit_max = 690 MLX5_CAP_QOS(mdev, packet_pacing_max_rate); 691 resp.packet_pacing_caps.qp_rate_limit_min = 692 MLX5_CAP_QOS(mdev, packet_pacing_min_rate); 693 resp.packet_pacing_caps.supported_qpts |= 694 1 << IB_QPT_RAW_PACKET; 695 } 696 resp.response_length += sizeof(resp.packet_pacing_caps); 697 } 698 699 if (field_avail(typeof(resp), mlx5_ib_support_multi_pkt_send_wqes, 700 uhw->outlen)) { 701 resp.mlx5_ib_support_multi_pkt_send_wqes = 702 MLX5_CAP_ETH(mdev, multi_pkt_send_wqe); 703 resp.response_length += 704 sizeof(resp.mlx5_ib_support_multi_pkt_send_wqes); 705 } 706 707 if (field_avail(typeof(resp), reserved, uhw->outlen)) 708 resp.response_length += sizeof(resp.reserved); 709 710 if (uhw->outlen) { 711 err = ib_copy_to_udata(uhw, &resp, resp.response_length); 712 713 if (err) 714 return err; 715 } 716 717 return 0; 718 } 719 720 enum mlx5_ib_width { 721 MLX5_IB_WIDTH_1X = 1 << 0, 722 MLX5_IB_WIDTH_2X = 1 << 1, 723 MLX5_IB_WIDTH_4X = 1 << 2, 724 MLX5_IB_WIDTH_8X = 1 << 3, 725 MLX5_IB_WIDTH_12X = 1 << 4 726 }; 727 728 static int translate_active_width(struct ib_device *ibdev, u8 active_width, 729 u8 *ib_width) 730 { 731 struct mlx5_ib_dev *dev = to_mdev(ibdev); 732 int err = 0; 733 734 if (active_width & MLX5_IB_WIDTH_1X) { 735 *ib_width = IB_WIDTH_1X; 736 } else if (active_width & MLX5_IB_WIDTH_2X) { 737 mlx5_ib_dbg(dev, "active_width %d is not supported by IB spec\n", 738 (int)active_width); 739 err = -EINVAL; 740 } else if (active_width & MLX5_IB_WIDTH_4X) { 741 *ib_width = IB_WIDTH_4X; 742 } else if (active_width & MLX5_IB_WIDTH_8X) { 743 *ib_width = IB_WIDTH_8X; 744 } else if (active_width & MLX5_IB_WIDTH_12X) { 745 *ib_width = IB_WIDTH_12X; 746 } else { 747 mlx5_ib_dbg(dev, "Invalid active_width %d\n", 748 (int)active_width); 749 err = -EINVAL; 750 } 751 752 return err; 753 } 754 755 static int mlx5_mtu_to_ib_mtu(int mtu) 756 { 757 switch (mtu) { 758 case 256: return 1; 759 case 512: return 2; 760 case 1024: return 3; 761 case 2048: return 4; 762 case 4096: return 5; 763 default: 764 pr_warn("invalid mtu\n"); 765 return -1; 766 } 767 } 768 769 enum ib_max_vl_num { 770 __IB_MAX_VL_0 = 1, 771 __IB_MAX_VL_0_1 = 2, 772 __IB_MAX_VL_0_3 = 3, 773 __IB_MAX_VL_0_7 = 4, 774 __IB_MAX_VL_0_14 = 5, 775 }; 776 777 enum mlx5_vl_hw_cap { 778 MLX5_VL_HW_0 = 1, 779 MLX5_VL_HW_0_1 = 2, 780 MLX5_VL_HW_0_2 = 3, 781 MLX5_VL_HW_0_3 = 4, 782 MLX5_VL_HW_0_4 = 5, 783 MLX5_VL_HW_0_5 = 6, 784 MLX5_VL_HW_0_6 = 7, 785 MLX5_VL_HW_0_7 = 8, 786 MLX5_VL_HW_0_14 = 15 787 }; 788 789 static int translate_max_vl_num(struct ib_device *ibdev, u8 vl_hw_cap, 790 u8 *max_vl_num) 791 { 792 switch (vl_hw_cap) { 793 case MLX5_VL_HW_0: 794 *max_vl_num = __IB_MAX_VL_0; 795 break; 796 case MLX5_VL_HW_0_1: 797 *max_vl_num = __IB_MAX_VL_0_1; 798 break; 799 case MLX5_VL_HW_0_3: 800 *max_vl_num = __IB_MAX_VL_0_3; 801 break; 802 case MLX5_VL_HW_0_7: 803 *max_vl_num = __IB_MAX_VL_0_7; 804 break; 805 case MLX5_VL_HW_0_14: 806 *max_vl_num = __IB_MAX_VL_0_14; 807 break; 808 809 default: 810 return -EINVAL; 811 } 812 813 return 0; 814 } 815 816 static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port, 817 struct ib_port_attr *props) 818 { 819 struct mlx5_ib_dev *dev = to_mdev(ibdev); 820 struct mlx5_core_dev *mdev = dev->mdev; 821 struct mlx5_hca_vport_context *rep; 822 u16 max_mtu; 823 u16 oper_mtu; 824 int err; 825 u8 ib_link_width_oper; 826 u8 vl_hw_cap; 827 828 rep = kzalloc(sizeof(*rep), GFP_KERNEL); 829 if (!rep) { 830 err = -ENOMEM; 831 goto out; 832 } 833 834 memset(props, 0, sizeof(*props)); 835 836 err = mlx5_query_hca_vport_context(mdev, 0, port, 0, rep); 837 if (err) 838 goto out; 839 840 props->lid = rep->lid; 841 props->lmc = rep->lmc; 842 props->sm_lid = rep->sm_lid; 843 props->sm_sl = rep->sm_sl; 844 props->state = rep->vport_state; 845 props->phys_state = rep->port_physical_state; 846 props->port_cap_flags = rep->cap_mask1; 847 props->gid_tbl_len = mlx5_get_gid_table_len(MLX5_CAP_GEN(mdev, gid_table_size)); 848 props->max_msg_sz = 1 << MLX5_CAP_GEN(mdev, log_max_msg); 849 props->pkey_tbl_len = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev, pkey_table_size)); 850 props->bad_pkey_cntr = rep->pkey_violation_counter; 851 props->qkey_viol_cntr = rep->qkey_violation_counter; 852 props->subnet_timeout = rep->subnet_timeout; 853 props->init_type_reply = rep->init_type_reply; 854 props->grh_required = rep->grh_required; 855 856 err = mlx5_query_port_link_width_oper(mdev, &ib_link_width_oper, port); 857 if (err) 858 goto out; 859 860 err = translate_active_width(ibdev, ib_link_width_oper, 861 &props->active_width); 862 if (err) 863 goto out; 864 err = mlx5_query_port_ib_proto_oper(mdev, &props->active_speed, port); 865 if (err) 866 goto out; 867 868 mlx5_query_port_max_mtu(mdev, &max_mtu, port); 869 870 props->max_mtu = mlx5_mtu_to_ib_mtu(max_mtu); 871 872 mlx5_query_port_oper_mtu(mdev, &oper_mtu, port); 873 874 props->active_mtu = mlx5_mtu_to_ib_mtu(oper_mtu); 875 876 err = mlx5_query_port_vl_hw_cap(mdev, &vl_hw_cap, port); 877 if (err) 878 goto out; 879 880 err = translate_max_vl_num(ibdev, vl_hw_cap, 881 &props->max_vl_num); 882 out: 883 kfree(rep); 884 return err; 885 } 886 887 int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, 888 struct ib_port_attr *props) 889 { 890 switch (mlx5_get_vport_access_method(ibdev)) { 891 case MLX5_VPORT_ACCESS_METHOD_MAD: 892 return mlx5_query_mad_ifc_port(ibdev, port, props); 893 894 case MLX5_VPORT_ACCESS_METHOD_HCA: 895 return mlx5_query_hca_port(ibdev, port, props); 896 897 case MLX5_VPORT_ACCESS_METHOD_NIC: 898 return mlx5_query_port_roce(ibdev, port, props); 899 900 default: 901 return -EINVAL; 902 } 903 } 904 905 static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index, 906 union ib_gid *gid) 907 { 908 struct mlx5_ib_dev *dev = to_mdev(ibdev); 909 struct mlx5_core_dev *mdev = dev->mdev; 910 911 switch (mlx5_get_vport_access_method(ibdev)) { 912 case MLX5_VPORT_ACCESS_METHOD_MAD: 913 return mlx5_query_mad_ifc_gids(ibdev, port, index, gid); 914 915 case MLX5_VPORT_ACCESS_METHOD_HCA: 916 return mlx5_query_hca_vport_gid(mdev, 0, port, 0, index, gid); 917 918 default: 919 return -EINVAL; 920 } 921 922 } 923 924 static int mlx5_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, 925 u16 *pkey) 926 { 927 struct mlx5_ib_dev *dev = to_mdev(ibdev); 928 struct mlx5_core_dev *mdev = dev->mdev; 929 930 switch (mlx5_get_vport_access_method(ibdev)) { 931 case MLX5_VPORT_ACCESS_METHOD_MAD: 932 return mlx5_query_mad_ifc_pkey(ibdev, port, index, pkey); 933 934 case MLX5_VPORT_ACCESS_METHOD_HCA: 935 case MLX5_VPORT_ACCESS_METHOD_NIC: 936 return mlx5_query_hca_vport_pkey(mdev, 0, port, 0, index, 937 pkey); 938 default: 939 return -EINVAL; 940 } 941 } 942 943 static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask, 944 struct ib_device_modify *props) 945 { 946 struct mlx5_ib_dev *dev = to_mdev(ibdev); 947 struct mlx5_reg_node_desc in; 948 struct mlx5_reg_node_desc out; 949 int err; 950 951 if (mask & ~IB_DEVICE_MODIFY_NODE_DESC) 952 return -EOPNOTSUPP; 953 954 if (!(mask & IB_DEVICE_MODIFY_NODE_DESC)) 955 return 0; 956 957 /* 958 * If possible, pass node desc to FW, so it can generate 959 * a 144 trap. If cmd fails, just ignore. 960 */ 961 memcpy(&in, props->node_desc, IB_DEVICE_NODE_DESC_MAX); 962 err = mlx5_core_access_reg(dev->mdev, &in, sizeof(in), &out, 963 sizeof(out), MLX5_REG_NODE_DESC, 0, 1); 964 if (err) 965 return err; 966 967 memcpy(ibdev->node_desc, props->node_desc, IB_DEVICE_NODE_DESC_MAX); 968 969 return err; 970 } 971 972 static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask, 973 struct ib_port_modify *props) 974 { 975 struct mlx5_ib_dev *dev = to_mdev(ibdev); 976 struct ib_port_attr attr; 977 u32 tmp; 978 int err; 979 980 mutex_lock(&dev->cap_mask_mutex); 981 982 err = mlx5_ib_query_port(ibdev, port, &attr); 983 if (err) 984 goto out; 985 986 tmp = (attr.port_cap_flags | props->set_port_cap_mask) & 987 ~props->clr_port_cap_mask; 988 989 err = mlx5_set_port_caps(dev->mdev, port, tmp); 990 991 out: 992 mutex_unlock(&dev->cap_mask_mutex); 993 return err; 994 } 995 996 static void print_lib_caps(struct mlx5_ib_dev *dev, u64 caps) 997 { 998 mlx5_ib_dbg(dev, "MLX5_LIB_CAP_4K_UAR = %s\n", 999 caps & MLX5_LIB_CAP_4K_UAR ? "y" : "n"); 1000 } 1001 1002 static int calc_total_bfregs(struct mlx5_ib_dev *dev, bool lib_uar_4k, 1003 struct mlx5_ib_alloc_ucontext_req_v2 *req, 1004 u32 *num_sys_pages) 1005 { 1006 int uars_per_sys_page; 1007 int bfregs_per_sys_page; 1008 int ref_bfregs = req->total_num_bfregs; 1009 1010 if (req->total_num_bfregs == 0) 1011 return -EINVAL; 1012 1013 BUILD_BUG_ON(MLX5_MAX_BFREGS % MLX5_NON_FP_BFREGS_IN_PAGE); 1014 BUILD_BUG_ON(MLX5_MAX_BFREGS < MLX5_NON_FP_BFREGS_IN_PAGE); 1015 1016 if (req->total_num_bfregs > MLX5_MAX_BFREGS) 1017 return -ENOMEM; 1018 1019 uars_per_sys_page = get_uars_per_sys_page(dev, lib_uar_4k); 1020 bfregs_per_sys_page = uars_per_sys_page * MLX5_NON_FP_BFREGS_PER_UAR; 1021 req->total_num_bfregs = ALIGN(req->total_num_bfregs, bfregs_per_sys_page); 1022 *num_sys_pages = req->total_num_bfregs / bfregs_per_sys_page; 1023 1024 if (req->num_low_latency_bfregs > req->total_num_bfregs - 1) 1025 return -EINVAL; 1026 1027 mlx5_ib_dbg(dev, "uar_4k: fw support %s, lib support %s, user requested %d bfregs, alloated %d, using %d sys pages\n", 1028 MLX5_CAP_GEN(dev->mdev, uar_4k) ? "yes" : "no", 1029 lib_uar_4k ? "yes" : "no", ref_bfregs, 1030 req->total_num_bfregs, *num_sys_pages); 1031 1032 return 0; 1033 } 1034 1035 static int allocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *context) 1036 { 1037 struct mlx5_bfreg_info *bfregi; 1038 int err; 1039 int i; 1040 1041 bfregi = &context->bfregi; 1042 for (i = 0; i < bfregi->num_sys_pages; i++) { 1043 err = mlx5_cmd_alloc_uar(dev->mdev, &bfregi->sys_pages[i]); 1044 if (err) 1045 goto error; 1046 1047 mlx5_ib_dbg(dev, "allocated uar %d\n", bfregi->sys_pages[i]); 1048 } 1049 return 0; 1050 1051 error: 1052 for (--i; i >= 0; i--) 1053 if (mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i])) 1054 mlx5_ib_warn(dev, "failed to free uar %d\n", i); 1055 1056 return err; 1057 } 1058 1059 static int deallocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *context) 1060 { 1061 struct mlx5_bfreg_info *bfregi; 1062 int err; 1063 int i; 1064 1065 bfregi = &context->bfregi; 1066 for (i = 0; i < bfregi->num_sys_pages; i++) { 1067 err = mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]); 1068 if (err) { 1069 mlx5_ib_warn(dev, "failed to free uar %d\n", i); 1070 return err; 1071 } 1072 } 1073 return 0; 1074 } 1075 1076 static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, 1077 struct ib_udata *udata) 1078 { 1079 struct mlx5_ib_dev *dev = to_mdev(ibdev); 1080 struct mlx5_ib_alloc_ucontext_req_v2 req = {}; 1081 struct mlx5_ib_alloc_ucontext_resp resp = {}; 1082 struct mlx5_ib_ucontext *context; 1083 struct mlx5_bfreg_info *bfregi; 1084 int ver; 1085 int err; 1086 size_t reqlen; 1087 size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2, 1088 max_cqe_version); 1089 bool lib_uar_4k; 1090 1091 if (!dev->ib_active) 1092 return ERR_PTR(-EAGAIN); 1093 1094 if (udata->inlen < sizeof(struct ib_uverbs_cmd_hdr)) 1095 return ERR_PTR(-EINVAL); 1096 1097 reqlen = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr); 1098 if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req)) 1099 ver = 0; 1100 else if (reqlen >= min_req_v2) 1101 ver = 2; 1102 else 1103 return ERR_PTR(-EINVAL); 1104 1105 err = ib_copy_from_udata(&req, udata, min(reqlen, sizeof(req))); 1106 if (err) 1107 return ERR_PTR(err); 1108 1109 if (req.flags) 1110 return ERR_PTR(-EINVAL); 1111 1112 if (req.comp_mask || req.reserved0 || req.reserved1 || req.reserved2) 1113 return ERR_PTR(-EOPNOTSUPP); 1114 1115 req.total_num_bfregs = ALIGN(req.total_num_bfregs, 1116 MLX5_NON_FP_BFREGS_PER_UAR); 1117 if (req.num_low_latency_bfregs > req.total_num_bfregs - 1) 1118 return ERR_PTR(-EINVAL); 1119 1120 resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp); 1121 if (mlx5_core_is_pf(dev->mdev) && MLX5_CAP_GEN(dev->mdev, bf)) 1122 resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size); 1123 resp.cache_line_size = cache_line_size(); 1124 resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq); 1125 resp.max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq); 1126 resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); 1127 resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); 1128 resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz); 1129 resp.cqe_version = min_t(__u8, 1130 (__u8)MLX5_CAP_GEN(dev->mdev, cqe_version), 1131 req.max_cqe_version); 1132 resp.log_uar_size = MLX5_CAP_GEN(dev->mdev, uar_4k) ? 1133 MLX5_ADAPTER_PAGE_SHIFT : PAGE_SHIFT; 1134 resp.num_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ? 1135 MLX5_CAP_GEN(dev->mdev, num_of_uars_per_page) : 1; 1136 resp.response_length = min(offsetof(typeof(resp), response_length) + 1137 sizeof(resp.response_length), udata->outlen); 1138 1139 context = kzalloc(sizeof(*context), GFP_KERNEL); 1140 if (!context) 1141 return ERR_PTR(-ENOMEM); 1142 1143 lib_uar_4k = req.lib_caps & MLX5_LIB_CAP_4K_UAR; 1144 bfregi = &context->bfregi; 1145 1146 /* updates req->total_num_bfregs */ 1147 err = calc_total_bfregs(dev, lib_uar_4k, &req, &bfregi->num_sys_pages); 1148 if (err) 1149 goto out_ctx; 1150 1151 mutex_init(&bfregi->lock); 1152 bfregi->lib_uar_4k = lib_uar_4k; 1153 bfregi->count = kcalloc(req.total_num_bfregs, sizeof(*bfregi->count), 1154 GFP_KERNEL); 1155 if (!bfregi->count) { 1156 err = -ENOMEM; 1157 goto out_ctx; 1158 } 1159 1160 bfregi->sys_pages = kcalloc(bfregi->num_sys_pages, 1161 sizeof(*bfregi->sys_pages), 1162 GFP_KERNEL); 1163 if (!bfregi->sys_pages) { 1164 err = -ENOMEM; 1165 goto out_count; 1166 } 1167 1168 err = allocate_uars(dev, context); 1169 if (err) 1170 goto out_sys_pages; 1171 1172 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 1173 context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range; 1174 #endif 1175 1176 context->upd_xlt_page = __get_free_page(GFP_KERNEL); 1177 if (!context->upd_xlt_page) { 1178 err = -ENOMEM; 1179 goto out_uars; 1180 } 1181 mutex_init(&context->upd_xlt_page_mutex); 1182 1183 if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) { 1184 err = mlx5_core_alloc_transport_domain(dev->mdev, 1185 &context->tdn); 1186 if (err) 1187 goto out_page; 1188 } 1189 1190 INIT_LIST_HEAD(&context->vma_private_list); 1191 INIT_LIST_HEAD(&context->db_page_list); 1192 mutex_init(&context->db_page_mutex); 1193 1194 resp.tot_bfregs = req.total_num_bfregs; 1195 resp.num_ports = MLX5_CAP_GEN(dev->mdev, num_ports); 1196 1197 if (field_avail(typeof(resp), cqe_version, udata->outlen)) 1198 resp.response_length += sizeof(resp.cqe_version); 1199 1200 if (field_avail(typeof(resp), cmds_supp_uhw, udata->outlen)) { 1201 resp.cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE | 1202 MLX5_USER_CMDS_SUPP_UHW_CREATE_AH; 1203 resp.response_length += sizeof(resp.cmds_supp_uhw); 1204 } 1205 1206 if (field_avail(typeof(resp), eth_min_inline, udata->outlen)) { 1207 if (mlx5_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET) { 1208 mlx5_query_min_inline(dev->mdev, &resp.eth_min_inline); 1209 resp.eth_min_inline++; 1210 } 1211 resp.response_length += sizeof(resp.eth_min_inline); 1212 } 1213 1214 /* 1215 * We don't want to expose information from the PCI bar that is located 1216 * after 4096 bytes, so if the arch only supports larger pages, let's 1217 * pretend we don't support reading the HCA's core clock. This is also 1218 * forced by mmap function. 1219 */ 1220 if (field_avail(typeof(resp), hca_core_clock_offset, udata->outlen)) { 1221 if (PAGE_SIZE <= 4096) { 1222 resp.comp_mask |= 1223 MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET; 1224 resp.hca_core_clock_offset = 1225 offsetof(struct mlx5_init_seg, internal_timer_h) % PAGE_SIZE; 1226 } 1227 resp.response_length += sizeof(resp.hca_core_clock_offset) + 1228 sizeof(resp.reserved2); 1229 } 1230 1231 if (field_avail(typeof(resp), log_uar_size, udata->outlen)) 1232 resp.response_length += sizeof(resp.log_uar_size); 1233 1234 if (field_avail(typeof(resp), num_uars_per_page, udata->outlen)) 1235 resp.response_length += sizeof(resp.num_uars_per_page); 1236 1237 err = ib_copy_to_udata(udata, &resp, resp.response_length); 1238 if (err) 1239 goto out_td; 1240 1241 bfregi->ver = ver; 1242 bfregi->num_low_latency_bfregs = req.num_low_latency_bfregs; 1243 context->cqe_version = resp.cqe_version; 1244 context->lib_caps = req.lib_caps; 1245 print_lib_caps(dev, context->lib_caps); 1246 1247 return &context->ibucontext; 1248 1249 out_td: 1250 if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) 1251 mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn); 1252 1253 out_page: 1254 free_page(context->upd_xlt_page); 1255 1256 out_uars: 1257 deallocate_uars(dev, context); 1258 1259 out_sys_pages: 1260 kfree(bfregi->sys_pages); 1261 1262 out_count: 1263 kfree(bfregi->count); 1264 1265 out_ctx: 1266 kfree(context); 1267 1268 return ERR_PTR(err); 1269 } 1270 1271 static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) 1272 { 1273 struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); 1274 struct mlx5_ib_dev *dev = to_mdev(ibcontext->device); 1275 struct mlx5_bfreg_info *bfregi; 1276 1277 bfregi = &context->bfregi; 1278 if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) 1279 mlx5_core_dealloc_transport_domain(dev->mdev, context->tdn); 1280 1281 free_page(context->upd_xlt_page); 1282 deallocate_uars(dev, context); 1283 kfree(bfregi->sys_pages); 1284 kfree(bfregi->count); 1285 kfree(context); 1286 1287 return 0; 1288 } 1289 1290 static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev, 1291 struct mlx5_bfreg_info *bfregi, 1292 int idx) 1293 { 1294 int fw_uars_per_page; 1295 1296 fw_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ? MLX5_UARS_IN_PAGE : 1; 1297 1298 return (pci_resource_start(dev->mdev->pdev, 0) >> PAGE_SHIFT) + 1299 bfregi->sys_pages[idx] / fw_uars_per_page; 1300 } 1301 1302 static int get_command(unsigned long offset) 1303 { 1304 return (offset >> MLX5_IB_MMAP_CMD_SHIFT) & MLX5_IB_MMAP_CMD_MASK; 1305 } 1306 1307 static int get_arg(unsigned long offset) 1308 { 1309 return offset & ((1 << MLX5_IB_MMAP_CMD_SHIFT) - 1); 1310 } 1311 1312 static int get_index(unsigned long offset) 1313 { 1314 return get_arg(offset); 1315 } 1316 1317 static void mlx5_ib_vma_open(struct vm_area_struct *area) 1318 { 1319 /* vma_open is called when a new VMA is created on top of our VMA. This 1320 * is done through either mremap flow or split_vma (usually due to 1321 * mlock, madvise, munmap, etc.) We do not support a clone of the VMA, 1322 * as this VMA is strongly hardware related. Therefore we set the 1323 * vm_ops of the newly created/cloned VMA to NULL, to prevent it from 1324 * calling us again and trying to do incorrect actions. We assume that 1325 * the original VMA size is exactly a single page, and therefore all 1326 * "splitting" operation will not happen to it. 1327 */ 1328 area->vm_ops = NULL; 1329 } 1330 1331 static void mlx5_ib_vma_close(struct vm_area_struct *area) 1332 { 1333 struct mlx5_ib_vma_private_data *mlx5_ib_vma_priv_data; 1334 1335 /* It's guaranteed that all VMAs opened on a FD are closed before the 1336 * file itself is closed, therefore no sync is needed with the regular 1337 * closing flow. (e.g. mlx5 ib_dealloc_ucontext) 1338 * However need a sync with accessing the vma as part of 1339 * mlx5_ib_disassociate_ucontext. 1340 * The close operation is usually called under mm->mmap_sem except when 1341 * process is exiting. 1342 * The exiting case is handled explicitly as part of 1343 * mlx5_ib_disassociate_ucontext. 1344 */ 1345 mlx5_ib_vma_priv_data = (struct mlx5_ib_vma_private_data *)area->vm_private_data; 1346 1347 /* setting the vma context pointer to null in the mlx5_ib driver's 1348 * private data, to protect a race condition in 1349 * mlx5_ib_disassociate_ucontext(). 1350 */ 1351 mlx5_ib_vma_priv_data->vma = NULL; 1352 list_del(&mlx5_ib_vma_priv_data->list); 1353 kfree(mlx5_ib_vma_priv_data); 1354 } 1355 1356 static const struct vm_operations_struct mlx5_ib_vm_ops = { 1357 .open = mlx5_ib_vma_open, 1358 .close = mlx5_ib_vma_close 1359 }; 1360 1361 static int mlx5_ib_set_vma_data(struct vm_area_struct *vma, 1362 struct mlx5_ib_ucontext *ctx) 1363 { 1364 struct mlx5_ib_vma_private_data *vma_prv; 1365 struct list_head *vma_head = &ctx->vma_private_list; 1366 1367 vma_prv = kzalloc(sizeof(*vma_prv), GFP_KERNEL); 1368 if (!vma_prv) 1369 return -ENOMEM; 1370 1371 vma_prv->vma = vma; 1372 vma->vm_private_data = vma_prv; 1373 vma->vm_ops = &mlx5_ib_vm_ops; 1374 1375 list_add(&vma_prv->list, vma_head); 1376 1377 return 0; 1378 } 1379 1380 static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext) 1381 { 1382 int ret; 1383 struct vm_area_struct *vma; 1384 struct mlx5_ib_vma_private_data *vma_private, *n; 1385 struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); 1386 struct task_struct *owning_process = NULL; 1387 struct mm_struct *owning_mm = NULL; 1388 1389 owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID); 1390 if (!owning_process) 1391 return; 1392 1393 owning_mm = get_task_mm(owning_process); 1394 if (!owning_mm) { 1395 pr_info("no mm, disassociate ucontext is pending task termination\n"); 1396 while (1) { 1397 put_task_struct(owning_process); 1398 usleep_range(1000, 2000); 1399 owning_process = get_pid_task(ibcontext->tgid, 1400 PIDTYPE_PID); 1401 if (!owning_process || 1402 owning_process->state == TASK_DEAD) { 1403 pr_info("disassociate ucontext done, task was terminated\n"); 1404 /* in case task was dead need to release the 1405 * task struct. 1406 */ 1407 if (owning_process) 1408 put_task_struct(owning_process); 1409 return; 1410 } 1411 } 1412 } 1413 1414 /* need to protect from a race on closing the vma as part of 1415 * mlx5_ib_vma_close. 1416 */ 1417 down_read(&owning_mm->mmap_sem); 1418 list_for_each_entry_safe(vma_private, n, &context->vma_private_list, 1419 list) { 1420 vma = vma_private->vma; 1421 ret = zap_vma_ptes(vma, vma->vm_start, 1422 PAGE_SIZE); 1423 WARN_ONCE(ret, "%s: zap_vma_ptes failed", __func__); 1424 /* context going to be destroyed, should 1425 * not access ops any more. 1426 */ 1427 vma->vm_ops = NULL; 1428 list_del(&vma_private->list); 1429 kfree(vma_private); 1430 } 1431 up_read(&owning_mm->mmap_sem); 1432 mmput(owning_mm); 1433 put_task_struct(owning_process); 1434 } 1435 1436 static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd) 1437 { 1438 switch (cmd) { 1439 case MLX5_IB_MMAP_WC_PAGE: 1440 return "WC"; 1441 case MLX5_IB_MMAP_REGULAR_PAGE: 1442 return "best effort WC"; 1443 case MLX5_IB_MMAP_NC_PAGE: 1444 return "NC"; 1445 default: 1446 return NULL; 1447 } 1448 } 1449 1450 static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd, 1451 struct vm_area_struct *vma, 1452 struct mlx5_ib_ucontext *context) 1453 { 1454 struct mlx5_bfreg_info *bfregi = &context->bfregi; 1455 int err; 1456 unsigned long idx; 1457 phys_addr_t pfn, pa; 1458 pgprot_t prot; 1459 int uars_per_page; 1460 1461 if (vma->vm_end - vma->vm_start != PAGE_SIZE) 1462 return -EINVAL; 1463 1464 uars_per_page = get_uars_per_sys_page(dev, bfregi->lib_uar_4k); 1465 idx = get_index(vma->vm_pgoff); 1466 if (idx % uars_per_page || 1467 idx * uars_per_page >= bfregi->num_sys_pages) { 1468 mlx5_ib_warn(dev, "invalid uar index %lu\n", idx); 1469 return -EINVAL; 1470 } 1471 1472 switch (cmd) { 1473 case MLX5_IB_MMAP_WC_PAGE: 1474 /* Some architectures don't support WC memory */ 1475 #if defined(CONFIG_X86) 1476 if (!pat_enabled()) 1477 return -EPERM; 1478 #elif !(defined(CONFIG_PPC) || (defined(CONFIG_ARM) && defined(CONFIG_MMU))) 1479 return -EPERM; 1480 #endif 1481 /* fall through */ 1482 case MLX5_IB_MMAP_REGULAR_PAGE: 1483 /* For MLX5_IB_MMAP_REGULAR_PAGE do the best effort to get WC */ 1484 prot = pgprot_writecombine(vma->vm_page_prot); 1485 break; 1486 case MLX5_IB_MMAP_NC_PAGE: 1487 prot = pgprot_noncached(vma->vm_page_prot); 1488 break; 1489 default: 1490 return -EINVAL; 1491 } 1492 1493 pfn = uar_index2pfn(dev, bfregi, idx); 1494 mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn %pa\n", idx, &pfn); 1495 1496 vma->vm_page_prot = prot; 1497 err = io_remap_pfn_range(vma, vma->vm_start, pfn, 1498 PAGE_SIZE, vma->vm_page_prot); 1499 if (err) { 1500 mlx5_ib_err(dev, "io_remap_pfn_range failed with error=%d, vm_start=0x%lx, pfn=%pa, mmap_cmd=%s\n", 1501 err, vma->vm_start, &pfn, mmap_cmd2str(cmd)); 1502 return -EAGAIN; 1503 } 1504 1505 pa = pfn << PAGE_SHIFT; 1506 mlx5_ib_dbg(dev, "mapped %s at 0x%lx, PA %pa\n", mmap_cmd2str(cmd), 1507 vma->vm_start, &pa); 1508 1509 return mlx5_ib_set_vma_data(vma, context); 1510 } 1511 1512 static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) 1513 { 1514 struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); 1515 struct mlx5_ib_dev *dev = to_mdev(ibcontext->device); 1516 unsigned long command; 1517 phys_addr_t pfn; 1518 1519 command = get_command(vma->vm_pgoff); 1520 switch (command) { 1521 case MLX5_IB_MMAP_WC_PAGE: 1522 case MLX5_IB_MMAP_NC_PAGE: 1523 case MLX5_IB_MMAP_REGULAR_PAGE: 1524 return uar_mmap(dev, command, vma, context); 1525 1526 case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES: 1527 return -ENOSYS; 1528 1529 case MLX5_IB_MMAP_CORE_CLOCK: 1530 if (vma->vm_end - vma->vm_start != PAGE_SIZE) 1531 return -EINVAL; 1532 1533 if (vma->vm_flags & VM_WRITE) 1534 return -EPERM; 1535 1536 /* Don't expose to user-space information it shouldn't have */ 1537 if (PAGE_SIZE > 4096) 1538 return -EOPNOTSUPP; 1539 1540 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 1541 pfn = (dev->mdev->iseg_base + 1542 offsetof(struct mlx5_init_seg, internal_timer_h)) >> 1543 PAGE_SHIFT; 1544 if (io_remap_pfn_range(vma, vma->vm_start, pfn, 1545 PAGE_SIZE, vma->vm_page_prot)) 1546 return -EAGAIN; 1547 1548 mlx5_ib_dbg(dev, "mapped internal timer at 0x%lx, PA 0x%llx\n", 1549 vma->vm_start, 1550 (unsigned long long)pfn << PAGE_SHIFT); 1551 break; 1552 1553 default: 1554 return -EINVAL; 1555 } 1556 1557 return 0; 1558 } 1559 1560 static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev, 1561 struct ib_ucontext *context, 1562 struct ib_udata *udata) 1563 { 1564 struct mlx5_ib_alloc_pd_resp resp; 1565 struct mlx5_ib_pd *pd; 1566 int err; 1567 1568 pd = kmalloc(sizeof(*pd), GFP_KERNEL); 1569 if (!pd) 1570 return ERR_PTR(-ENOMEM); 1571 1572 err = mlx5_core_alloc_pd(to_mdev(ibdev)->mdev, &pd->pdn); 1573 if (err) { 1574 kfree(pd); 1575 return ERR_PTR(err); 1576 } 1577 1578 if (context) { 1579 resp.pdn = pd->pdn; 1580 if (ib_copy_to_udata(udata, &resp, sizeof(resp))) { 1581 mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn); 1582 kfree(pd); 1583 return ERR_PTR(-EFAULT); 1584 } 1585 } 1586 1587 return &pd->ibpd; 1588 } 1589 1590 static int mlx5_ib_dealloc_pd(struct ib_pd *pd) 1591 { 1592 struct mlx5_ib_dev *mdev = to_mdev(pd->device); 1593 struct mlx5_ib_pd *mpd = to_mpd(pd); 1594 1595 mlx5_core_dealloc_pd(mdev->mdev, mpd->pdn); 1596 kfree(mpd); 1597 1598 return 0; 1599 } 1600 1601 enum { 1602 MATCH_CRITERIA_ENABLE_OUTER_BIT, 1603 MATCH_CRITERIA_ENABLE_MISC_BIT, 1604 MATCH_CRITERIA_ENABLE_INNER_BIT 1605 }; 1606 1607 #define HEADER_IS_ZERO(match_criteria, headers) \ 1608 !(memchr_inv(MLX5_ADDR_OF(fte_match_param, match_criteria, headers), \ 1609 0, MLX5_FLD_SZ_BYTES(fte_match_param, headers))) \ 1610 1611 static u8 get_match_criteria_enable(u32 *match_criteria) 1612 { 1613 u8 match_criteria_enable; 1614 1615 match_criteria_enable = 1616 (!HEADER_IS_ZERO(match_criteria, outer_headers)) << 1617 MATCH_CRITERIA_ENABLE_OUTER_BIT; 1618 match_criteria_enable |= 1619 (!HEADER_IS_ZERO(match_criteria, misc_parameters)) << 1620 MATCH_CRITERIA_ENABLE_MISC_BIT; 1621 match_criteria_enable |= 1622 (!HEADER_IS_ZERO(match_criteria, inner_headers)) << 1623 MATCH_CRITERIA_ENABLE_INNER_BIT; 1624 1625 return match_criteria_enable; 1626 } 1627 1628 static void set_proto(void *outer_c, void *outer_v, u8 mask, u8 val) 1629 { 1630 MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask); 1631 MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val); 1632 } 1633 1634 static void set_flow_label(void *misc_c, void *misc_v, u8 mask, u8 val, 1635 bool inner) 1636 { 1637 if (inner) { 1638 MLX5_SET(fte_match_set_misc, 1639 misc_c, inner_ipv6_flow_label, mask); 1640 MLX5_SET(fte_match_set_misc, 1641 misc_v, inner_ipv6_flow_label, val); 1642 } else { 1643 MLX5_SET(fte_match_set_misc, 1644 misc_c, outer_ipv6_flow_label, mask); 1645 MLX5_SET(fte_match_set_misc, 1646 misc_v, outer_ipv6_flow_label, val); 1647 } 1648 } 1649 1650 static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val) 1651 { 1652 MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_ecn, mask); 1653 MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_ecn, val); 1654 MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_dscp, mask >> 2); 1655 MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_dscp, val >> 2); 1656 } 1657 1658 #define LAST_ETH_FIELD vlan_tag 1659 #define LAST_IB_FIELD sl 1660 #define LAST_IPV4_FIELD tos 1661 #define LAST_IPV6_FIELD traffic_class 1662 #define LAST_TCP_UDP_FIELD src_port 1663 #define LAST_TUNNEL_FIELD tunnel_id 1664 1665 /* Field is the last supported field */ 1666 #define FIELDS_NOT_SUPPORTED(filter, field)\ 1667 memchr_inv((void *)&filter.field +\ 1668 sizeof(filter.field), 0,\ 1669 sizeof(filter) -\ 1670 offsetof(typeof(filter), field) -\ 1671 sizeof(filter.field)) 1672 1673 static int parse_flow_attr(u32 *match_c, u32 *match_v, 1674 const union ib_flow_spec *ib_spec) 1675 { 1676 void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c, 1677 misc_parameters); 1678 void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v, 1679 misc_parameters); 1680 void *headers_c; 1681 void *headers_v; 1682 1683 if (ib_spec->type & IB_FLOW_SPEC_INNER) { 1684 headers_c = MLX5_ADDR_OF(fte_match_param, match_c, 1685 inner_headers); 1686 headers_v = MLX5_ADDR_OF(fte_match_param, match_v, 1687 inner_headers); 1688 } else { 1689 headers_c = MLX5_ADDR_OF(fte_match_param, match_c, 1690 outer_headers); 1691 headers_v = MLX5_ADDR_OF(fte_match_param, match_v, 1692 outer_headers); 1693 } 1694 1695 switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) { 1696 case IB_FLOW_SPEC_ETH: 1697 if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD)) 1698 return -ENOTSUPP; 1699 1700 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 1701 dmac_47_16), 1702 ib_spec->eth.mask.dst_mac); 1703 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 1704 dmac_47_16), 1705 ib_spec->eth.val.dst_mac); 1706 1707 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 1708 smac_47_16), 1709 ib_spec->eth.mask.src_mac); 1710 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 1711 smac_47_16), 1712 ib_spec->eth.val.src_mac); 1713 1714 if (ib_spec->eth.mask.vlan_tag) { 1715 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 1716 cvlan_tag, 1); 1717 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 1718 cvlan_tag, 1); 1719 1720 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 1721 first_vid, ntohs(ib_spec->eth.mask.vlan_tag)); 1722 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 1723 first_vid, ntohs(ib_spec->eth.val.vlan_tag)); 1724 1725 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 1726 first_cfi, 1727 ntohs(ib_spec->eth.mask.vlan_tag) >> 12); 1728 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 1729 first_cfi, 1730 ntohs(ib_spec->eth.val.vlan_tag) >> 12); 1731 1732 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 1733 first_prio, 1734 ntohs(ib_spec->eth.mask.vlan_tag) >> 13); 1735 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 1736 first_prio, 1737 ntohs(ib_spec->eth.val.vlan_tag) >> 13); 1738 } 1739 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 1740 ethertype, ntohs(ib_spec->eth.mask.ether_type)); 1741 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 1742 ethertype, ntohs(ib_spec->eth.val.ether_type)); 1743 break; 1744 case IB_FLOW_SPEC_IPV4: 1745 if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD)) 1746 return -ENOTSUPP; 1747 1748 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 1749 ethertype, 0xffff); 1750 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 1751 ethertype, ETH_P_IP); 1752 1753 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 1754 src_ipv4_src_ipv6.ipv4_layout.ipv4), 1755 &ib_spec->ipv4.mask.src_ip, 1756 sizeof(ib_spec->ipv4.mask.src_ip)); 1757 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 1758 src_ipv4_src_ipv6.ipv4_layout.ipv4), 1759 &ib_spec->ipv4.val.src_ip, 1760 sizeof(ib_spec->ipv4.val.src_ip)); 1761 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 1762 dst_ipv4_dst_ipv6.ipv4_layout.ipv4), 1763 &ib_spec->ipv4.mask.dst_ip, 1764 sizeof(ib_spec->ipv4.mask.dst_ip)); 1765 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 1766 dst_ipv4_dst_ipv6.ipv4_layout.ipv4), 1767 &ib_spec->ipv4.val.dst_ip, 1768 sizeof(ib_spec->ipv4.val.dst_ip)); 1769 1770 set_tos(headers_c, headers_v, 1771 ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos); 1772 1773 set_proto(headers_c, headers_v, 1774 ib_spec->ipv4.mask.proto, ib_spec->ipv4.val.proto); 1775 break; 1776 case IB_FLOW_SPEC_IPV6: 1777 if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD)) 1778 return -ENOTSUPP; 1779 1780 MLX5_SET(fte_match_set_lyr_2_4, headers_c, 1781 ethertype, 0xffff); 1782 MLX5_SET(fte_match_set_lyr_2_4, headers_v, 1783 ethertype, ETH_P_IPV6); 1784 1785 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 1786 src_ipv4_src_ipv6.ipv6_layout.ipv6), 1787 &ib_spec->ipv6.mask.src_ip, 1788 sizeof(ib_spec->ipv6.mask.src_ip)); 1789 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 1790 src_ipv4_src_ipv6.ipv6_layout.ipv6), 1791 &ib_spec->ipv6.val.src_ip, 1792 sizeof(ib_spec->ipv6.val.src_ip)); 1793 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, 1794 dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 1795 &ib_spec->ipv6.mask.dst_ip, 1796 sizeof(ib_spec->ipv6.mask.dst_ip)); 1797 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, 1798 dst_ipv4_dst_ipv6.ipv6_layout.ipv6), 1799 &ib_spec->ipv6.val.dst_ip, 1800 sizeof(ib_spec->ipv6.val.dst_ip)); 1801 1802 set_tos(headers_c, headers_v, 1803 ib_spec->ipv6.mask.traffic_class, 1804 ib_spec->ipv6.val.traffic_class); 1805 1806 set_proto(headers_c, headers_v, 1807 ib_spec->ipv6.mask.next_hdr, 1808 ib_spec->ipv6.val.next_hdr); 1809 1810 set_flow_label(misc_params_c, misc_params_v, 1811 ntohl(ib_spec->ipv6.mask.flow_label), 1812 ntohl(ib_spec->ipv6.val.flow_label), 1813 ib_spec->type & IB_FLOW_SPEC_INNER); 1814 1815 break; 1816 case IB_FLOW_SPEC_TCP: 1817 if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, 1818 LAST_TCP_UDP_FIELD)) 1819 return -ENOTSUPP; 1820 1821 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, 1822 0xff); 1823 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, 1824 IPPROTO_TCP); 1825 1826 MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_sport, 1827 ntohs(ib_spec->tcp_udp.mask.src_port)); 1828 MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_sport, 1829 ntohs(ib_spec->tcp_udp.val.src_port)); 1830 1831 MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_dport, 1832 ntohs(ib_spec->tcp_udp.mask.dst_port)); 1833 MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_dport, 1834 ntohs(ib_spec->tcp_udp.val.dst_port)); 1835 break; 1836 case IB_FLOW_SPEC_UDP: 1837 if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, 1838 LAST_TCP_UDP_FIELD)) 1839 return -ENOTSUPP; 1840 1841 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, 1842 0xff); 1843 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, 1844 IPPROTO_UDP); 1845 1846 MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport, 1847 ntohs(ib_spec->tcp_udp.mask.src_port)); 1848 MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport, 1849 ntohs(ib_spec->tcp_udp.val.src_port)); 1850 1851 MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport, 1852 ntohs(ib_spec->tcp_udp.mask.dst_port)); 1853 MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, 1854 ntohs(ib_spec->tcp_udp.val.dst_port)); 1855 break; 1856 case IB_FLOW_SPEC_VXLAN_TUNNEL: 1857 if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask, 1858 LAST_TUNNEL_FIELD)) 1859 return -ENOTSUPP; 1860 1861 MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni, 1862 ntohl(ib_spec->tunnel.mask.tunnel_id)); 1863 MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni, 1864 ntohl(ib_spec->tunnel.val.tunnel_id)); 1865 break; 1866 default: 1867 return -EINVAL; 1868 } 1869 1870 return 0; 1871 } 1872 1873 /* If a flow could catch both multicast and unicast packets, 1874 * it won't fall into the multicast flow steering table and this rule 1875 * could steal other multicast packets. 1876 */ 1877 static bool flow_is_multicast_only(struct ib_flow_attr *ib_attr) 1878 { 1879 struct ib_flow_spec_eth *eth_spec; 1880 1881 if (ib_attr->type != IB_FLOW_ATTR_NORMAL || 1882 ib_attr->size < sizeof(struct ib_flow_attr) + 1883 sizeof(struct ib_flow_spec_eth) || 1884 ib_attr->num_of_specs < 1) 1885 return false; 1886 1887 eth_spec = (struct ib_flow_spec_eth *)(ib_attr + 1); 1888 if (eth_spec->type != IB_FLOW_SPEC_ETH || 1889 eth_spec->size != sizeof(*eth_spec)) 1890 return false; 1891 1892 return is_multicast_ether_addr(eth_spec->mask.dst_mac) && 1893 is_multicast_ether_addr(eth_spec->val.dst_mac); 1894 } 1895 1896 static bool is_valid_attr(const struct ib_flow_attr *flow_attr) 1897 { 1898 union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1); 1899 bool has_ipv4_spec = false; 1900 bool eth_type_ipv4 = true; 1901 unsigned int spec_index; 1902 1903 /* Validate that ethertype is correct */ 1904 for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) { 1905 if (ib_spec->type == IB_FLOW_SPEC_ETH && 1906 ib_spec->eth.mask.ether_type) { 1907 if (!((ib_spec->eth.mask.ether_type == htons(0xffff)) && 1908 ib_spec->eth.val.ether_type == htons(ETH_P_IP))) 1909 eth_type_ipv4 = false; 1910 } else if (ib_spec->type == IB_FLOW_SPEC_IPV4) { 1911 has_ipv4_spec = true; 1912 } 1913 ib_spec = (void *)ib_spec + ib_spec->size; 1914 } 1915 return !has_ipv4_spec || eth_type_ipv4; 1916 } 1917 1918 static void put_flow_table(struct mlx5_ib_dev *dev, 1919 struct mlx5_ib_flow_prio *prio, bool ft_added) 1920 { 1921 prio->refcount -= !!ft_added; 1922 if (!prio->refcount) { 1923 mlx5_destroy_flow_table(prio->flow_table); 1924 prio->flow_table = NULL; 1925 } 1926 } 1927 1928 static int mlx5_ib_destroy_flow(struct ib_flow *flow_id) 1929 { 1930 struct mlx5_ib_dev *dev = to_mdev(flow_id->qp->device); 1931 struct mlx5_ib_flow_handler *handler = container_of(flow_id, 1932 struct mlx5_ib_flow_handler, 1933 ibflow); 1934 struct mlx5_ib_flow_handler *iter, *tmp; 1935 1936 mutex_lock(&dev->flow_db.lock); 1937 1938 list_for_each_entry_safe(iter, tmp, &handler->list, list) { 1939 mlx5_del_flow_rules(iter->rule); 1940 put_flow_table(dev, iter->prio, true); 1941 list_del(&iter->list); 1942 kfree(iter); 1943 } 1944 1945 mlx5_del_flow_rules(handler->rule); 1946 put_flow_table(dev, handler->prio, true); 1947 mutex_unlock(&dev->flow_db.lock); 1948 1949 kfree(handler); 1950 1951 return 0; 1952 } 1953 1954 static int ib_prio_to_core_prio(unsigned int priority, bool dont_trap) 1955 { 1956 priority *= 2; 1957 if (!dont_trap) 1958 priority++; 1959 return priority; 1960 } 1961 1962 enum flow_table_type { 1963 MLX5_IB_FT_RX, 1964 MLX5_IB_FT_TX 1965 }; 1966 1967 #define MLX5_FS_MAX_TYPES 10 1968 #define MLX5_FS_MAX_ENTRIES 32000UL 1969 static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, 1970 struct ib_flow_attr *flow_attr, 1971 enum flow_table_type ft_type) 1972 { 1973 bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP; 1974 struct mlx5_flow_namespace *ns = NULL; 1975 struct mlx5_ib_flow_prio *prio; 1976 struct mlx5_flow_table *ft; 1977 int num_entries; 1978 int num_groups; 1979 int priority; 1980 int err = 0; 1981 1982 if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { 1983 if (flow_is_multicast_only(flow_attr) && 1984 !dont_trap) 1985 priority = MLX5_IB_FLOW_MCAST_PRIO; 1986 else 1987 priority = ib_prio_to_core_prio(flow_attr->priority, 1988 dont_trap); 1989 ns = mlx5_get_flow_namespace(dev->mdev, 1990 MLX5_FLOW_NAMESPACE_BYPASS); 1991 num_entries = MLX5_FS_MAX_ENTRIES; 1992 num_groups = MLX5_FS_MAX_TYPES; 1993 prio = &dev->flow_db.prios[priority]; 1994 } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || 1995 flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { 1996 ns = mlx5_get_flow_namespace(dev->mdev, 1997 MLX5_FLOW_NAMESPACE_LEFTOVERS); 1998 build_leftovers_ft_param(&priority, 1999 &num_entries, 2000 &num_groups); 2001 prio = &dev->flow_db.prios[MLX5_IB_FLOW_LEFTOVERS_PRIO]; 2002 } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { 2003 if (!MLX5_CAP_FLOWTABLE(dev->mdev, 2004 allow_sniffer_and_nic_rx_shared_tir)) 2005 return ERR_PTR(-ENOTSUPP); 2006 2007 ns = mlx5_get_flow_namespace(dev->mdev, ft_type == MLX5_IB_FT_RX ? 2008 MLX5_FLOW_NAMESPACE_SNIFFER_RX : 2009 MLX5_FLOW_NAMESPACE_SNIFFER_TX); 2010 2011 prio = &dev->flow_db.sniffer[ft_type]; 2012 priority = 0; 2013 num_entries = 1; 2014 num_groups = 1; 2015 } 2016 2017 if (!ns) 2018 return ERR_PTR(-ENOTSUPP); 2019 2020 ft = prio->flow_table; 2021 if (!ft) { 2022 ft = mlx5_create_auto_grouped_flow_table(ns, priority, 2023 num_entries, 2024 num_groups, 2025 0, 0); 2026 2027 if (!IS_ERR(ft)) { 2028 prio->refcount = 0; 2029 prio->flow_table = ft; 2030 } else { 2031 err = PTR_ERR(ft); 2032 } 2033 } 2034 2035 return err ? ERR_PTR(err) : prio; 2036 } 2037 2038 static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, 2039 struct mlx5_ib_flow_prio *ft_prio, 2040 const struct ib_flow_attr *flow_attr, 2041 struct mlx5_flow_destination *dst) 2042 { 2043 struct mlx5_flow_table *ft = ft_prio->flow_table; 2044 struct mlx5_ib_flow_handler *handler; 2045 struct mlx5_flow_act flow_act = {0}; 2046 struct mlx5_flow_spec *spec; 2047 const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr); 2048 unsigned int spec_index; 2049 int err = 0; 2050 2051 if (!is_valid_attr(flow_attr)) 2052 return ERR_PTR(-EINVAL); 2053 2054 spec = mlx5_vzalloc(sizeof(*spec)); 2055 handler = kzalloc(sizeof(*handler), GFP_KERNEL); 2056 if (!handler || !spec) { 2057 err = -ENOMEM; 2058 goto free; 2059 } 2060 2061 INIT_LIST_HEAD(&handler->list); 2062 2063 for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) { 2064 err = parse_flow_attr(spec->match_criteria, 2065 spec->match_value, ib_flow); 2066 if (err < 0) 2067 goto free; 2068 2069 ib_flow += ((union ib_flow_spec *)ib_flow)->size; 2070 } 2071 2072 spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria); 2073 flow_act.action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST : 2074 MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; 2075 flow_act.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; 2076 handler->rule = mlx5_add_flow_rules(ft, spec, 2077 &flow_act, 2078 dst, 1); 2079 2080 if (IS_ERR(handler->rule)) { 2081 err = PTR_ERR(handler->rule); 2082 goto free; 2083 } 2084 2085 ft_prio->refcount++; 2086 handler->prio = ft_prio; 2087 2088 ft_prio->flow_table = ft; 2089 free: 2090 if (err) 2091 kfree(handler); 2092 kvfree(spec); 2093 return err ? ERR_PTR(err) : handler; 2094 } 2095 2096 static struct mlx5_ib_flow_handler *create_dont_trap_rule(struct mlx5_ib_dev *dev, 2097 struct mlx5_ib_flow_prio *ft_prio, 2098 struct ib_flow_attr *flow_attr, 2099 struct mlx5_flow_destination *dst) 2100 { 2101 struct mlx5_ib_flow_handler *handler_dst = NULL; 2102 struct mlx5_ib_flow_handler *handler = NULL; 2103 2104 handler = create_flow_rule(dev, ft_prio, flow_attr, NULL); 2105 if (!IS_ERR(handler)) { 2106 handler_dst = create_flow_rule(dev, ft_prio, 2107 flow_attr, dst); 2108 if (IS_ERR(handler_dst)) { 2109 mlx5_del_flow_rules(handler->rule); 2110 ft_prio->refcount--; 2111 kfree(handler); 2112 handler = handler_dst; 2113 } else { 2114 list_add(&handler_dst->list, &handler->list); 2115 } 2116 } 2117 2118 return handler; 2119 } 2120 enum { 2121 LEFTOVERS_MC, 2122 LEFTOVERS_UC, 2123 }; 2124 2125 static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *dev, 2126 struct mlx5_ib_flow_prio *ft_prio, 2127 struct ib_flow_attr *flow_attr, 2128 struct mlx5_flow_destination *dst) 2129 { 2130 struct mlx5_ib_flow_handler *handler_ucast = NULL; 2131 struct mlx5_ib_flow_handler *handler = NULL; 2132 2133 static struct { 2134 struct ib_flow_attr flow_attr; 2135 struct ib_flow_spec_eth eth_flow; 2136 } leftovers_specs[] = { 2137 [LEFTOVERS_MC] = { 2138 .flow_attr = { 2139 .num_of_specs = 1, 2140 .size = sizeof(leftovers_specs[0]) 2141 }, 2142 .eth_flow = { 2143 .type = IB_FLOW_SPEC_ETH, 2144 .size = sizeof(struct ib_flow_spec_eth), 2145 .mask = {.dst_mac = {0x1} }, 2146 .val = {.dst_mac = {0x1} } 2147 } 2148 }, 2149 [LEFTOVERS_UC] = { 2150 .flow_attr = { 2151 .num_of_specs = 1, 2152 .size = sizeof(leftovers_specs[0]) 2153 }, 2154 .eth_flow = { 2155 .type = IB_FLOW_SPEC_ETH, 2156 .size = sizeof(struct ib_flow_spec_eth), 2157 .mask = {.dst_mac = {0x1} }, 2158 .val = {.dst_mac = {} } 2159 } 2160 } 2161 }; 2162 2163 handler = create_flow_rule(dev, ft_prio, 2164 &leftovers_specs[LEFTOVERS_MC].flow_attr, 2165 dst); 2166 if (!IS_ERR(handler) && 2167 flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT) { 2168 handler_ucast = create_flow_rule(dev, ft_prio, 2169 &leftovers_specs[LEFTOVERS_UC].flow_attr, 2170 dst); 2171 if (IS_ERR(handler_ucast)) { 2172 mlx5_del_flow_rules(handler->rule); 2173 ft_prio->refcount--; 2174 kfree(handler); 2175 handler = handler_ucast; 2176 } else { 2177 list_add(&handler_ucast->list, &handler->list); 2178 } 2179 } 2180 2181 return handler; 2182 } 2183 2184 static struct mlx5_ib_flow_handler *create_sniffer_rule(struct mlx5_ib_dev *dev, 2185 struct mlx5_ib_flow_prio *ft_rx, 2186 struct mlx5_ib_flow_prio *ft_tx, 2187 struct mlx5_flow_destination *dst) 2188 { 2189 struct mlx5_ib_flow_handler *handler_rx; 2190 struct mlx5_ib_flow_handler *handler_tx; 2191 int err; 2192 static const struct ib_flow_attr flow_attr = { 2193 .num_of_specs = 0, 2194 .size = sizeof(flow_attr) 2195 }; 2196 2197 handler_rx = create_flow_rule(dev, ft_rx, &flow_attr, dst); 2198 if (IS_ERR(handler_rx)) { 2199 err = PTR_ERR(handler_rx); 2200 goto err; 2201 } 2202 2203 handler_tx = create_flow_rule(dev, ft_tx, &flow_attr, dst); 2204 if (IS_ERR(handler_tx)) { 2205 err = PTR_ERR(handler_tx); 2206 goto err_tx; 2207 } 2208 2209 list_add(&handler_tx->list, &handler_rx->list); 2210 2211 return handler_rx; 2212 2213 err_tx: 2214 mlx5_del_flow_rules(handler_rx->rule); 2215 ft_rx->refcount--; 2216 kfree(handler_rx); 2217 err: 2218 return ERR_PTR(err); 2219 } 2220 2221 static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, 2222 struct ib_flow_attr *flow_attr, 2223 int domain) 2224 { 2225 struct mlx5_ib_dev *dev = to_mdev(qp->device); 2226 struct mlx5_ib_qp *mqp = to_mqp(qp); 2227 struct mlx5_ib_flow_handler *handler = NULL; 2228 struct mlx5_flow_destination *dst = NULL; 2229 struct mlx5_ib_flow_prio *ft_prio_tx = NULL; 2230 struct mlx5_ib_flow_prio *ft_prio; 2231 int err; 2232 2233 if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO) 2234 return ERR_PTR(-ENOSPC); 2235 2236 if (domain != IB_FLOW_DOMAIN_USER || 2237 flow_attr->port > MLX5_CAP_GEN(dev->mdev, num_ports) || 2238 (flow_attr->flags & ~IB_FLOW_ATTR_FLAGS_DONT_TRAP)) 2239 return ERR_PTR(-EINVAL); 2240 2241 dst = kzalloc(sizeof(*dst), GFP_KERNEL); 2242 if (!dst) 2243 return ERR_PTR(-ENOMEM); 2244 2245 mutex_lock(&dev->flow_db.lock); 2246 2247 ft_prio = get_flow_table(dev, flow_attr, MLX5_IB_FT_RX); 2248 if (IS_ERR(ft_prio)) { 2249 err = PTR_ERR(ft_prio); 2250 goto unlock; 2251 } 2252 if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { 2253 ft_prio_tx = get_flow_table(dev, flow_attr, MLX5_IB_FT_TX); 2254 if (IS_ERR(ft_prio_tx)) { 2255 err = PTR_ERR(ft_prio_tx); 2256 ft_prio_tx = NULL; 2257 goto destroy_ft; 2258 } 2259 } 2260 2261 dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR; 2262 if (mqp->flags & MLX5_IB_QP_RSS) 2263 dst->tir_num = mqp->rss_qp.tirn; 2264 else 2265 dst->tir_num = mqp->raw_packet_qp.rq.tirn; 2266 2267 if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { 2268 if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) { 2269 handler = create_dont_trap_rule(dev, ft_prio, 2270 flow_attr, dst); 2271 } else { 2272 handler = create_flow_rule(dev, ft_prio, flow_attr, 2273 dst); 2274 } 2275 } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || 2276 flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { 2277 handler = create_leftovers_rule(dev, ft_prio, flow_attr, 2278 dst); 2279 } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { 2280 handler = create_sniffer_rule(dev, ft_prio, ft_prio_tx, dst); 2281 } else { 2282 err = -EINVAL; 2283 goto destroy_ft; 2284 } 2285 2286 if (IS_ERR(handler)) { 2287 err = PTR_ERR(handler); 2288 handler = NULL; 2289 goto destroy_ft; 2290 } 2291 2292 mutex_unlock(&dev->flow_db.lock); 2293 kfree(dst); 2294 2295 return &handler->ibflow; 2296 2297 destroy_ft: 2298 put_flow_table(dev, ft_prio, false); 2299 if (ft_prio_tx) 2300 put_flow_table(dev, ft_prio_tx, false); 2301 unlock: 2302 mutex_unlock(&dev->flow_db.lock); 2303 kfree(dst); 2304 kfree(handler); 2305 return ERR_PTR(err); 2306 } 2307 2308 static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) 2309 { 2310 struct mlx5_ib_dev *dev = to_mdev(ibqp->device); 2311 int err; 2312 2313 err = mlx5_core_attach_mcg(dev->mdev, gid, ibqp->qp_num); 2314 if (err) 2315 mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n", 2316 ibqp->qp_num, gid->raw); 2317 2318 return err; 2319 } 2320 2321 static int mlx5_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) 2322 { 2323 struct mlx5_ib_dev *dev = to_mdev(ibqp->device); 2324 int err; 2325 2326 err = mlx5_core_detach_mcg(dev->mdev, gid, ibqp->qp_num); 2327 if (err) 2328 mlx5_ib_warn(dev, "failed detaching QPN 0x%x, MGID %pI6\n", 2329 ibqp->qp_num, gid->raw); 2330 2331 return err; 2332 } 2333 2334 static int init_node_data(struct mlx5_ib_dev *dev) 2335 { 2336 int err; 2337 2338 err = mlx5_query_node_desc(dev, dev->ib_dev.node_desc); 2339 if (err) 2340 return err; 2341 2342 dev->mdev->rev_id = dev->mdev->pdev->revision; 2343 2344 return mlx5_query_node_guid(dev, &dev->ib_dev.node_guid); 2345 } 2346 2347 static ssize_t show_fw_pages(struct device *device, struct device_attribute *attr, 2348 char *buf) 2349 { 2350 struct mlx5_ib_dev *dev = 2351 container_of(device, struct mlx5_ib_dev, ib_dev.dev); 2352 2353 return sprintf(buf, "%d\n", dev->mdev->priv.fw_pages); 2354 } 2355 2356 static ssize_t show_reg_pages(struct device *device, 2357 struct device_attribute *attr, char *buf) 2358 { 2359 struct mlx5_ib_dev *dev = 2360 container_of(device, struct mlx5_ib_dev, ib_dev.dev); 2361 2362 return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages)); 2363 } 2364 2365 static ssize_t show_hca(struct device *device, struct device_attribute *attr, 2366 char *buf) 2367 { 2368 struct mlx5_ib_dev *dev = 2369 container_of(device, struct mlx5_ib_dev, ib_dev.dev); 2370 return sprintf(buf, "MT%d\n", dev->mdev->pdev->device); 2371 } 2372 2373 static ssize_t show_rev(struct device *device, struct device_attribute *attr, 2374 char *buf) 2375 { 2376 struct mlx5_ib_dev *dev = 2377 container_of(device, struct mlx5_ib_dev, ib_dev.dev); 2378 return sprintf(buf, "%x\n", dev->mdev->rev_id); 2379 } 2380 2381 static ssize_t show_board(struct device *device, struct device_attribute *attr, 2382 char *buf) 2383 { 2384 struct mlx5_ib_dev *dev = 2385 container_of(device, struct mlx5_ib_dev, ib_dev.dev); 2386 return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN, 2387 dev->mdev->board_id); 2388 } 2389 2390 static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); 2391 static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); 2392 static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); 2393 static DEVICE_ATTR(fw_pages, S_IRUGO, show_fw_pages, NULL); 2394 static DEVICE_ATTR(reg_pages, S_IRUGO, show_reg_pages, NULL); 2395 2396 static struct device_attribute *mlx5_class_attributes[] = { 2397 &dev_attr_hw_rev, 2398 &dev_attr_hca_type, 2399 &dev_attr_board_id, 2400 &dev_attr_fw_pages, 2401 &dev_attr_reg_pages, 2402 }; 2403 2404 static void pkey_change_handler(struct work_struct *work) 2405 { 2406 struct mlx5_ib_port_resources *ports = 2407 container_of(work, struct mlx5_ib_port_resources, 2408 pkey_change_work); 2409 2410 mutex_lock(&ports->devr->mutex); 2411 mlx5_ib_gsi_pkey_change(ports->gsi); 2412 mutex_unlock(&ports->devr->mutex); 2413 } 2414 2415 static void mlx5_ib_handle_internal_error(struct mlx5_ib_dev *ibdev) 2416 { 2417 struct mlx5_ib_qp *mqp; 2418 struct mlx5_ib_cq *send_mcq, *recv_mcq; 2419 struct mlx5_core_cq *mcq; 2420 struct list_head cq_armed_list; 2421 unsigned long flags_qp; 2422 unsigned long flags_cq; 2423 unsigned long flags; 2424 2425 INIT_LIST_HEAD(&cq_armed_list); 2426 2427 /* Go over qp list reside on that ibdev, sync with create/destroy qp.*/ 2428 spin_lock_irqsave(&ibdev->reset_flow_resource_lock, flags); 2429 list_for_each_entry(mqp, &ibdev->qp_list, qps_list) { 2430 spin_lock_irqsave(&mqp->sq.lock, flags_qp); 2431 if (mqp->sq.tail != mqp->sq.head) { 2432 send_mcq = to_mcq(mqp->ibqp.send_cq); 2433 spin_lock_irqsave(&send_mcq->lock, flags_cq); 2434 if (send_mcq->mcq.comp && 2435 mqp->ibqp.send_cq->comp_handler) { 2436 if (!send_mcq->mcq.reset_notify_added) { 2437 send_mcq->mcq.reset_notify_added = 1; 2438 list_add_tail(&send_mcq->mcq.reset_notify, 2439 &cq_armed_list); 2440 } 2441 } 2442 spin_unlock_irqrestore(&send_mcq->lock, flags_cq); 2443 } 2444 spin_unlock_irqrestore(&mqp->sq.lock, flags_qp); 2445 spin_lock_irqsave(&mqp->rq.lock, flags_qp); 2446 /* no handling is needed for SRQ */ 2447 if (!mqp->ibqp.srq) { 2448 if (mqp->rq.tail != mqp->rq.head) { 2449 recv_mcq = to_mcq(mqp->ibqp.recv_cq); 2450 spin_lock_irqsave(&recv_mcq->lock, flags_cq); 2451 if (recv_mcq->mcq.comp && 2452 mqp->ibqp.recv_cq->comp_handler) { 2453 if (!recv_mcq->mcq.reset_notify_added) { 2454 recv_mcq->mcq.reset_notify_added = 1; 2455 list_add_tail(&recv_mcq->mcq.reset_notify, 2456 &cq_armed_list); 2457 } 2458 } 2459 spin_unlock_irqrestore(&recv_mcq->lock, 2460 flags_cq); 2461 } 2462 } 2463 spin_unlock_irqrestore(&mqp->rq.lock, flags_qp); 2464 } 2465 /*At that point all inflight post send were put to be executed as of we 2466 * lock/unlock above locks Now need to arm all involved CQs. 2467 */ 2468 list_for_each_entry(mcq, &cq_armed_list, reset_notify) { 2469 mcq->comp(mcq); 2470 } 2471 spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags); 2472 } 2473 2474 static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, 2475 enum mlx5_dev_event event, unsigned long param) 2476 { 2477 struct mlx5_ib_dev *ibdev = (struct mlx5_ib_dev *)context; 2478 struct ib_event ibev; 2479 bool fatal = false; 2480 u8 port = 0; 2481 2482 switch (event) { 2483 case MLX5_DEV_EVENT_SYS_ERROR: 2484 ibev.event = IB_EVENT_DEVICE_FATAL; 2485 mlx5_ib_handle_internal_error(ibdev); 2486 fatal = true; 2487 break; 2488 2489 case MLX5_DEV_EVENT_PORT_UP: 2490 case MLX5_DEV_EVENT_PORT_DOWN: 2491 case MLX5_DEV_EVENT_PORT_INITIALIZED: 2492 port = (u8)param; 2493 2494 /* In RoCE, port up/down events are handled in 2495 * mlx5_netdev_event(). 2496 */ 2497 if (mlx5_ib_port_link_layer(&ibdev->ib_dev, port) == 2498 IB_LINK_LAYER_ETHERNET) 2499 return; 2500 2501 ibev.event = (event == MLX5_DEV_EVENT_PORT_UP) ? 2502 IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR; 2503 break; 2504 2505 case MLX5_DEV_EVENT_LID_CHANGE: 2506 ibev.event = IB_EVENT_LID_CHANGE; 2507 port = (u8)param; 2508 break; 2509 2510 case MLX5_DEV_EVENT_PKEY_CHANGE: 2511 ibev.event = IB_EVENT_PKEY_CHANGE; 2512 port = (u8)param; 2513 2514 schedule_work(&ibdev->devr.ports[port - 1].pkey_change_work); 2515 break; 2516 2517 case MLX5_DEV_EVENT_GUID_CHANGE: 2518 ibev.event = IB_EVENT_GID_CHANGE; 2519 port = (u8)param; 2520 break; 2521 2522 case MLX5_DEV_EVENT_CLIENT_REREG: 2523 ibev.event = IB_EVENT_CLIENT_REREGISTER; 2524 port = (u8)param; 2525 break; 2526 default: 2527 return; 2528 } 2529 2530 ibev.device = &ibdev->ib_dev; 2531 ibev.element.port_num = port; 2532 2533 if (port < 1 || port > ibdev->num_ports) { 2534 mlx5_ib_warn(ibdev, "warning: event on port %d\n", port); 2535 return; 2536 } 2537 2538 if (ibdev->ib_active) 2539 ib_dispatch_event(&ibev); 2540 2541 if (fatal) 2542 ibdev->ib_active = false; 2543 } 2544 2545 static void get_ext_port_caps(struct mlx5_ib_dev *dev) 2546 { 2547 int port; 2548 2549 for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) 2550 mlx5_query_ext_port_caps(dev, port); 2551 } 2552 2553 static int get_port_caps(struct mlx5_ib_dev *dev) 2554 { 2555 struct ib_device_attr *dprops = NULL; 2556 struct ib_port_attr *pprops = NULL; 2557 int err = -ENOMEM; 2558 int port; 2559 struct ib_udata uhw = {.inlen = 0, .outlen = 0}; 2560 2561 pprops = kmalloc(sizeof(*pprops), GFP_KERNEL); 2562 if (!pprops) 2563 goto out; 2564 2565 dprops = kmalloc(sizeof(*dprops), GFP_KERNEL); 2566 if (!dprops) 2567 goto out; 2568 2569 err = mlx5_ib_query_device(&dev->ib_dev, dprops, &uhw); 2570 if (err) { 2571 mlx5_ib_warn(dev, "query_device failed %d\n", err); 2572 goto out; 2573 } 2574 2575 for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) { 2576 err = mlx5_ib_query_port(&dev->ib_dev, port, pprops); 2577 if (err) { 2578 mlx5_ib_warn(dev, "query_port %d failed %d\n", 2579 port, err); 2580 break; 2581 } 2582 dev->mdev->port_caps[port - 1].pkey_table_len = 2583 dprops->max_pkeys; 2584 dev->mdev->port_caps[port - 1].gid_table_len = 2585 pprops->gid_tbl_len; 2586 mlx5_ib_dbg(dev, "pkey_table_len %d, gid_table_len %d\n", 2587 dprops->max_pkeys, pprops->gid_tbl_len); 2588 } 2589 2590 out: 2591 kfree(pprops); 2592 kfree(dprops); 2593 2594 return err; 2595 } 2596 2597 static void destroy_umrc_res(struct mlx5_ib_dev *dev) 2598 { 2599 int err; 2600 2601 err = mlx5_mr_cache_cleanup(dev); 2602 if (err) 2603 mlx5_ib_warn(dev, "mr cache cleanup failed\n"); 2604 2605 mlx5_ib_destroy_qp(dev->umrc.qp); 2606 ib_free_cq(dev->umrc.cq); 2607 ib_dealloc_pd(dev->umrc.pd); 2608 } 2609 2610 enum { 2611 MAX_UMR_WR = 128, 2612 }; 2613 2614 static int create_umr_res(struct mlx5_ib_dev *dev) 2615 { 2616 struct ib_qp_init_attr *init_attr = NULL; 2617 struct ib_qp_attr *attr = NULL; 2618 struct ib_pd *pd; 2619 struct ib_cq *cq; 2620 struct ib_qp *qp; 2621 int ret; 2622 2623 attr = kzalloc(sizeof(*attr), GFP_KERNEL); 2624 init_attr = kzalloc(sizeof(*init_attr), GFP_KERNEL); 2625 if (!attr || !init_attr) { 2626 ret = -ENOMEM; 2627 goto error_0; 2628 } 2629 2630 pd = ib_alloc_pd(&dev->ib_dev, 0); 2631 if (IS_ERR(pd)) { 2632 mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n"); 2633 ret = PTR_ERR(pd); 2634 goto error_0; 2635 } 2636 2637 cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ); 2638 if (IS_ERR(cq)) { 2639 mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n"); 2640 ret = PTR_ERR(cq); 2641 goto error_2; 2642 } 2643 2644 init_attr->send_cq = cq; 2645 init_attr->recv_cq = cq; 2646 init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; 2647 init_attr->cap.max_send_wr = MAX_UMR_WR; 2648 init_attr->cap.max_send_sge = 1; 2649 init_attr->qp_type = MLX5_IB_QPT_REG_UMR; 2650 init_attr->port_num = 1; 2651 qp = mlx5_ib_create_qp(pd, init_attr, NULL); 2652 if (IS_ERR(qp)) { 2653 mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n"); 2654 ret = PTR_ERR(qp); 2655 goto error_3; 2656 } 2657 qp->device = &dev->ib_dev; 2658 qp->real_qp = qp; 2659 qp->uobject = NULL; 2660 qp->qp_type = MLX5_IB_QPT_REG_UMR; 2661 2662 attr->qp_state = IB_QPS_INIT; 2663 attr->port_num = 1; 2664 ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_PKEY_INDEX | 2665 IB_QP_PORT, NULL); 2666 if (ret) { 2667 mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n"); 2668 goto error_4; 2669 } 2670 2671 memset(attr, 0, sizeof(*attr)); 2672 attr->qp_state = IB_QPS_RTR; 2673 attr->path_mtu = IB_MTU_256; 2674 2675 ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL); 2676 if (ret) { 2677 mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n"); 2678 goto error_4; 2679 } 2680 2681 memset(attr, 0, sizeof(*attr)); 2682 attr->qp_state = IB_QPS_RTS; 2683 ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL); 2684 if (ret) { 2685 mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n"); 2686 goto error_4; 2687 } 2688 2689 dev->umrc.qp = qp; 2690 dev->umrc.cq = cq; 2691 dev->umrc.pd = pd; 2692 2693 sema_init(&dev->umrc.sem, MAX_UMR_WR); 2694 ret = mlx5_mr_cache_init(dev); 2695 if (ret) { 2696 mlx5_ib_warn(dev, "mr cache init failed %d\n", ret); 2697 goto error_4; 2698 } 2699 2700 kfree(attr); 2701 kfree(init_attr); 2702 2703 return 0; 2704 2705 error_4: 2706 mlx5_ib_destroy_qp(qp); 2707 2708 error_3: 2709 ib_free_cq(cq); 2710 2711 error_2: 2712 ib_dealloc_pd(pd); 2713 2714 error_0: 2715 kfree(attr); 2716 kfree(init_attr); 2717 return ret; 2718 } 2719 2720 static int create_dev_resources(struct mlx5_ib_resources *devr) 2721 { 2722 struct ib_srq_init_attr attr; 2723 struct mlx5_ib_dev *dev; 2724 struct ib_cq_init_attr cq_attr = {.cqe = 1}; 2725 int port; 2726 int ret = 0; 2727 2728 dev = container_of(devr, struct mlx5_ib_dev, devr); 2729 2730 mutex_init(&devr->mutex); 2731 2732 devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL); 2733 if (IS_ERR(devr->p0)) { 2734 ret = PTR_ERR(devr->p0); 2735 goto error0; 2736 } 2737 devr->p0->device = &dev->ib_dev; 2738 devr->p0->uobject = NULL; 2739 atomic_set(&devr->p0->usecnt, 0); 2740 2741 devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, &cq_attr, NULL, NULL); 2742 if (IS_ERR(devr->c0)) { 2743 ret = PTR_ERR(devr->c0); 2744 goto error1; 2745 } 2746 devr->c0->device = &dev->ib_dev; 2747 devr->c0->uobject = NULL; 2748 devr->c0->comp_handler = NULL; 2749 devr->c0->event_handler = NULL; 2750 devr->c0->cq_context = NULL; 2751 atomic_set(&devr->c0->usecnt, 0); 2752 2753 devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL); 2754 if (IS_ERR(devr->x0)) { 2755 ret = PTR_ERR(devr->x0); 2756 goto error2; 2757 } 2758 devr->x0->device = &dev->ib_dev; 2759 devr->x0->inode = NULL; 2760 atomic_set(&devr->x0->usecnt, 0); 2761 mutex_init(&devr->x0->tgt_qp_mutex); 2762 INIT_LIST_HEAD(&devr->x0->tgt_qp_list); 2763 2764 devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL); 2765 if (IS_ERR(devr->x1)) { 2766 ret = PTR_ERR(devr->x1); 2767 goto error3; 2768 } 2769 devr->x1->device = &dev->ib_dev; 2770 devr->x1->inode = NULL; 2771 atomic_set(&devr->x1->usecnt, 0); 2772 mutex_init(&devr->x1->tgt_qp_mutex); 2773 INIT_LIST_HEAD(&devr->x1->tgt_qp_list); 2774 2775 memset(&attr, 0, sizeof(attr)); 2776 attr.attr.max_sge = 1; 2777 attr.attr.max_wr = 1; 2778 attr.srq_type = IB_SRQT_XRC; 2779 attr.ext.xrc.cq = devr->c0; 2780 attr.ext.xrc.xrcd = devr->x0; 2781 2782 devr->s0 = mlx5_ib_create_srq(devr->p0, &attr, NULL); 2783 if (IS_ERR(devr->s0)) { 2784 ret = PTR_ERR(devr->s0); 2785 goto error4; 2786 } 2787 devr->s0->device = &dev->ib_dev; 2788 devr->s0->pd = devr->p0; 2789 devr->s0->uobject = NULL; 2790 devr->s0->event_handler = NULL; 2791 devr->s0->srq_context = NULL; 2792 devr->s0->srq_type = IB_SRQT_XRC; 2793 devr->s0->ext.xrc.xrcd = devr->x0; 2794 devr->s0->ext.xrc.cq = devr->c0; 2795 atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt); 2796 atomic_inc(&devr->s0->ext.xrc.cq->usecnt); 2797 atomic_inc(&devr->p0->usecnt); 2798 atomic_set(&devr->s0->usecnt, 0); 2799 2800 memset(&attr, 0, sizeof(attr)); 2801 attr.attr.max_sge = 1; 2802 attr.attr.max_wr = 1; 2803 attr.srq_type = IB_SRQT_BASIC; 2804 devr->s1 = mlx5_ib_create_srq(devr->p0, &attr, NULL); 2805 if (IS_ERR(devr->s1)) { 2806 ret = PTR_ERR(devr->s1); 2807 goto error5; 2808 } 2809 devr->s1->device = &dev->ib_dev; 2810 devr->s1->pd = devr->p0; 2811 devr->s1->uobject = NULL; 2812 devr->s1->event_handler = NULL; 2813 devr->s1->srq_context = NULL; 2814 devr->s1->srq_type = IB_SRQT_BASIC; 2815 devr->s1->ext.xrc.cq = devr->c0; 2816 atomic_inc(&devr->p0->usecnt); 2817 atomic_set(&devr->s0->usecnt, 0); 2818 2819 for (port = 0; port < ARRAY_SIZE(devr->ports); ++port) { 2820 INIT_WORK(&devr->ports[port].pkey_change_work, 2821 pkey_change_handler); 2822 devr->ports[port].devr = devr; 2823 } 2824 2825 return 0; 2826 2827 error5: 2828 mlx5_ib_destroy_srq(devr->s0); 2829 error4: 2830 mlx5_ib_dealloc_xrcd(devr->x1); 2831 error3: 2832 mlx5_ib_dealloc_xrcd(devr->x0); 2833 error2: 2834 mlx5_ib_destroy_cq(devr->c0); 2835 error1: 2836 mlx5_ib_dealloc_pd(devr->p0); 2837 error0: 2838 return ret; 2839 } 2840 2841 static void destroy_dev_resources(struct mlx5_ib_resources *devr) 2842 { 2843 struct mlx5_ib_dev *dev = 2844 container_of(devr, struct mlx5_ib_dev, devr); 2845 int port; 2846 2847 mlx5_ib_destroy_srq(devr->s1); 2848 mlx5_ib_destroy_srq(devr->s0); 2849 mlx5_ib_dealloc_xrcd(devr->x0); 2850 mlx5_ib_dealloc_xrcd(devr->x1); 2851 mlx5_ib_destroy_cq(devr->c0); 2852 mlx5_ib_dealloc_pd(devr->p0); 2853 2854 /* Make sure no change P_Key work items are still executing */ 2855 for (port = 0; port < dev->num_ports; ++port) 2856 cancel_work_sync(&devr->ports[port].pkey_change_work); 2857 } 2858 2859 static u32 get_core_cap_flags(struct ib_device *ibdev) 2860 { 2861 struct mlx5_ib_dev *dev = to_mdev(ibdev); 2862 enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, 1); 2863 u8 l3_type_cap = MLX5_CAP_ROCE(dev->mdev, l3_type); 2864 u8 roce_version_cap = MLX5_CAP_ROCE(dev->mdev, roce_version); 2865 u32 ret = 0; 2866 2867 if (ll == IB_LINK_LAYER_INFINIBAND) 2868 return RDMA_CORE_PORT_IBA_IB; 2869 2870 if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP)) 2871 return 0; 2872 2873 if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV6_CAP)) 2874 return 0; 2875 2876 if (roce_version_cap & MLX5_ROCE_VERSION_1_CAP) 2877 ret |= RDMA_CORE_PORT_IBA_ROCE; 2878 2879 if (roce_version_cap & MLX5_ROCE_VERSION_2_CAP) 2880 ret |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; 2881 2882 return ret; 2883 } 2884 2885 static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num, 2886 struct ib_port_immutable *immutable) 2887 { 2888 struct ib_port_attr attr; 2889 struct mlx5_ib_dev *dev = to_mdev(ibdev); 2890 enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, port_num); 2891 int err; 2892 2893 err = mlx5_ib_query_port(ibdev, port_num, &attr); 2894 if (err) 2895 return err; 2896 2897 immutable->pkey_tbl_len = attr.pkey_tbl_len; 2898 immutable->gid_tbl_len = attr.gid_tbl_len; 2899 immutable->core_cap_flags = get_core_cap_flags(ibdev); 2900 if ((ll == IB_LINK_LAYER_INFINIBAND) || MLX5_CAP_GEN(dev->mdev, roce)) 2901 immutable->max_mad_size = IB_MGMT_MAD_SIZE; 2902 2903 return 0; 2904 } 2905 2906 static void get_dev_fw_str(struct ib_device *ibdev, char *str, 2907 size_t str_len) 2908 { 2909 struct mlx5_ib_dev *dev = 2910 container_of(ibdev, struct mlx5_ib_dev, ib_dev); 2911 snprintf(str, str_len, "%d.%d.%04d", fw_rev_maj(dev->mdev), 2912 fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev)); 2913 } 2914 2915 static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev) 2916 { 2917 struct mlx5_core_dev *mdev = dev->mdev; 2918 struct mlx5_flow_namespace *ns = mlx5_get_flow_namespace(mdev, 2919 MLX5_FLOW_NAMESPACE_LAG); 2920 struct mlx5_flow_table *ft; 2921 int err; 2922 2923 if (!ns || !mlx5_lag_is_active(mdev)) 2924 return 0; 2925 2926 err = mlx5_cmd_create_vport_lag(mdev); 2927 if (err) 2928 return err; 2929 2930 ft = mlx5_create_lag_demux_flow_table(ns, 0, 0); 2931 if (IS_ERR(ft)) { 2932 err = PTR_ERR(ft); 2933 goto err_destroy_vport_lag; 2934 } 2935 2936 dev->flow_db.lag_demux_ft = ft; 2937 return 0; 2938 2939 err_destroy_vport_lag: 2940 mlx5_cmd_destroy_vport_lag(mdev); 2941 return err; 2942 } 2943 2944 static void mlx5_eth_lag_cleanup(struct mlx5_ib_dev *dev) 2945 { 2946 struct mlx5_core_dev *mdev = dev->mdev; 2947 2948 if (dev->flow_db.lag_demux_ft) { 2949 mlx5_destroy_flow_table(dev->flow_db.lag_demux_ft); 2950 dev->flow_db.lag_demux_ft = NULL; 2951 2952 mlx5_cmd_destroy_vport_lag(mdev); 2953 } 2954 } 2955 2956 static int mlx5_add_netdev_notifier(struct mlx5_ib_dev *dev) 2957 { 2958 int err; 2959 2960 dev->roce.nb.notifier_call = mlx5_netdev_event; 2961 err = register_netdevice_notifier(&dev->roce.nb); 2962 if (err) { 2963 dev->roce.nb.notifier_call = NULL; 2964 return err; 2965 } 2966 2967 return 0; 2968 } 2969 2970 static void mlx5_remove_netdev_notifier(struct mlx5_ib_dev *dev) 2971 { 2972 if (dev->roce.nb.notifier_call) { 2973 unregister_netdevice_notifier(&dev->roce.nb); 2974 dev->roce.nb.notifier_call = NULL; 2975 } 2976 } 2977 2978 static int mlx5_enable_eth(struct mlx5_ib_dev *dev) 2979 { 2980 int err; 2981 2982 err = mlx5_add_netdev_notifier(dev); 2983 if (err) 2984 return err; 2985 2986 if (MLX5_CAP_GEN(dev->mdev, roce)) { 2987 err = mlx5_nic_vport_enable_roce(dev->mdev); 2988 if (err) 2989 goto err_unregister_netdevice_notifier; 2990 } 2991 2992 err = mlx5_eth_lag_init(dev); 2993 if (err) 2994 goto err_disable_roce; 2995 2996 return 0; 2997 2998 err_disable_roce: 2999 if (MLX5_CAP_GEN(dev->mdev, roce)) 3000 mlx5_nic_vport_disable_roce(dev->mdev); 3001 3002 err_unregister_netdevice_notifier: 3003 mlx5_remove_netdev_notifier(dev); 3004 return err; 3005 } 3006 3007 static void mlx5_disable_eth(struct mlx5_ib_dev *dev) 3008 { 3009 mlx5_eth_lag_cleanup(dev); 3010 if (MLX5_CAP_GEN(dev->mdev, roce)) 3011 mlx5_nic_vport_disable_roce(dev->mdev); 3012 } 3013 3014 static void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev) 3015 { 3016 unsigned int i; 3017 3018 for (i = 0; i < dev->num_ports; i++) 3019 mlx5_core_dealloc_q_counter(dev->mdev, 3020 dev->port[i].q_cnt_id); 3021 } 3022 3023 static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev) 3024 { 3025 int i; 3026 int ret; 3027 3028 for (i = 0; i < dev->num_ports; i++) { 3029 ret = mlx5_core_alloc_q_counter(dev->mdev, 3030 &dev->port[i].q_cnt_id); 3031 if (ret) { 3032 mlx5_ib_warn(dev, 3033 "couldn't allocate queue counter for port %d, err %d\n", 3034 i + 1, ret); 3035 goto dealloc_counters; 3036 } 3037 } 3038 3039 return 0; 3040 3041 dealloc_counters: 3042 while (--i >= 0) 3043 mlx5_core_dealloc_q_counter(dev->mdev, 3044 dev->port[i].q_cnt_id); 3045 3046 return ret; 3047 } 3048 3049 static const char * const names[] = { 3050 "rx_write_requests", 3051 "rx_read_requests", 3052 "rx_atomic_requests", 3053 "out_of_buffer", 3054 "out_of_sequence", 3055 "duplicate_request", 3056 "rnr_nak_retry_err", 3057 "packet_seq_err", 3058 "implied_nak_seq_err", 3059 "local_ack_timeout_err", 3060 }; 3061 3062 static const size_t stats_offsets[] = { 3063 MLX5_BYTE_OFF(query_q_counter_out, rx_write_requests), 3064 MLX5_BYTE_OFF(query_q_counter_out, rx_read_requests), 3065 MLX5_BYTE_OFF(query_q_counter_out, rx_atomic_requests), 3066 MLX5_BYTE_OFF(query_q_counter_out, out_of_buffer), 3067 MLX5_BYTE_OFF(query_q_counter_out, out_of_sequence), 3068 MLX5_BYTE_OFF(query_q_counter_out, duplicate_request), 3069 MLX5_BYTE_OFF(query_q_counter_out, rnr_nak_retry_err), 3070 MLX5_BYTE_OFF(query_q_counter_out, packet_seq_err), 3071 MLX5_BYTE_OFF(query_q_counter_out, implied_nak_seq_err), 3072 MLX5_BYTE_OFF(query_q_counter_out, local_ack_timeout_err), 3073 }; 3074 3075 static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev, 3076 u8 port_num) 3077 { 3078 BUILD_BUG_ON(ARRAY_SIZE(names) != ARRAY_SIZE(stats_offsets)); 3079 3080 /* We support only per port stats */ 3081 if (port_num == 0) 3082 return NULL; 3083 3084 return rdma_alloc_hw_stats_struct(names, ARRAY_SIZE(names), 3085 RDMA_HW_STATS_DEFAULT_LIFESPAN); 3086 } 3087 3088 static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, 3089 struct rdma_hw_stats *stats, 3090 u8 port, int index) 3091 { 3092 struct mlx5_ib_dev *dev = to_mdev(ibdev); 3093 int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out); 3094 void *out; 3095 __be32 val; 3096 int ret; 3097 int i; 3098 3099 if (!port || !stats) 3100 return -ENOSYS; 3101 3102 out = mlx5_vzalloc(outlen); 3103 if (!out) 3104 return -ENOMEM; 3105 3106 ret = mlx5_core_query_q_counter(dev->mdev, 3107 dev->port[port - 1].q_cnt_id, 0, 3108 out, outlen); 3109 if (ret) 3110 goto free; 3111 3112 for (i = 0; i < ARRAY_SIZE(names); i++) { 3113 val = *(__be32 *)(out + stats_offsets[i]); 3114 stats->value[i] = (u64)be32_to_cpu(val); 3115 } 3116 free: 3117 kvfree(out); 3118 return ARRAY_SIZE(names); 3119 } 3120 3121 static void *mlx5_ib_add(struct mlx5_core_dev *mdev) 3122 { 3123 struct mlx5_ib_dev *dev; 3124 enum rdma_link_layer ll; 3125 int port_type_cap; 3126 const char *name; 3127 int err; 3128 int i; 3129 3130 port_type_cap = MLX5_CAP_GEN(mdev, port_type); 3131 ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); 3132 3133 printk_once(KERN_INFO "%s", mlx5_version); 3134 3135 dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev)); 3136 if (!dev) 3137 return NULL; 3138 3139 dev->mdev = mdev; 3140 3141 dev->port = kcalloc(MLX5_CAP_GEN(mdev, num_ports), sizeof(*dev->port), 3142 GFP_KERNEL); 3143 if (!dev->port) 3144 goto err_dealloc; 3145 3146 rwlock_init(&dev->roce.netdev_lock); 3147 err = get_port_caps(dev); 3148 if (err) 3149 goto err_free_port; 3150 3151 if (mlx5_use_mad_ifc(dev)) 3152 get_ext_port_caps(dev); 3153 3154 if (!mlx5_lag_is_active(mdev)) 3155 name = "mlx5_%d"; 3156 else 3157 name = "mlx5_bond_%d"; 3158 3159 strlcpy(dev->ib_dev.name, name, IB_DEVICE_NAME_MAX); 3160 dev->ib_dev.owner = THIS_MODULE; 3161 dev->ib_dev.node_type = RDMA_NODE_IB_CA; 3162 dev->ib_dev.local_dma_lkey = 0 /* not supported for now */; 3163 dev->num_ports = MLX5_CAP_GEN(mdev, num_ports); 3164 dev->ib_dev.phys_port_cnt = dev->num_ports; 3165 dev->ib_dev.num_comp_vectors = 3166 dev->mdev->priv.eq_table.num_comp_vectors; 3167 dev->ib_dev.dma_device = &mdev->pdev->dev; 3168 3169 dev->ib_dev.uverbs_abi_ver = MLX5_IB_UVERBS_ABI_VERSION; 3170 dev->ib_dev.uverbs_cmd_mask = 3171 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | 3172 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | 3173 (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | 3174 (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | 3175 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | 3176 (1ull << IB_USER_VERBS_CMD_CREATE_AH) | 3177 (1ull << IB_USER_VERBS_CMD_DESTROY_AH) | 3178 (1ull << IB_USER_VERBS_CMD_REG_MR) | 3179 (1ull << IB_USER_VERBS_CMD_REREG_MR) | 3180 (1ull << IB_USER_VERBS_CMD_DEREG_MR) | 3181 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | 3182 (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | 3183 (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | 3184 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | 3185 (1ull << IB_USER_VERBS_CMD_CREATE_QP) | 3186 (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | 3187 (1ull << IB_USER_VERBS_CMD_QUERY_QP) | 3188 (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | 3189 (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | 3190 (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | 3191 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | 3192 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | 3193 (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | 3194 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | 3195 (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) | 3196 (1ull << IB_USER_VERBS_CMD_OPEN_QP); 3197 dev->ib_dev.uverbs_ex_cmd_mask = 3198 (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) | 3199 (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) | 3200 (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP) | 3201 (1ull << IB_USER_VERBS_EX_CMD_MODIFY_QP); 3202 3203 dev->ib_dev.query_device = mlx5_ib_query_device; 3204 dev->ib_dev.query_port = mlx5_ib_query_port; 3205 dev->ib_dev.get_link_layer = mlx5_ib_port_link_layer; 3206 if (ll == IB_LINK_LAYER_ETHERNET) 3207 dev->ib_dev.get_netdev = mlx5_ib_get_netdev; 3208 dev->ib_dev.query_gid = mlx5_ib_query_gid; 3209 dev->ib_dev.add_gid = mlx5_ib_add_gid; 3210 dev->ib_dev.del_gid = mlx5_ib_del_gid; 3211 dev->ib_dev.query_pkey = mlx5_ib_query_pkey; 3212 dev->ib_dev.modify_device = mlx5_ib_modify_device; 3213 dev->ib_dev.modify_port = mlx5_ib_modify_port; 3214 dev->ib_dev.alloc_ucontext = mlx5_ib_alloc_ucontext; 3215 dev->ib_dev.dealloc_ucontext = mlx5_ib_dealloc_ucontext; 3216 dev->ib_dev.mmap = mlx5_ib_mmap; 3217 dev->ib_dev.alloc_pd = mlx5_ib_alloc_pd; 3218 dev->ib_dev.dealloc_pd = mlx5_ib_dealloc_pd; 3219 dev->ib_dev.create_ah = mlx5_ib_create_ah; 3220 dev->ib_dev.query_ah = mlx5_ib_query_ah; 3221 dev->ib_dev.destroy_ah = mlx5_ib_destroy_ah; 3222 dev->ib_dev.create_srq = mlx5_ib_create_srq; 3223 dev->ib_dev.modify_srq = mlx5_ib_modify_srq; 3224 dev->ib_dev.query_srq = mlx5_ib_query_srq; 3225 dev->ib_dev.destroy_srq = mlx5_ib_destroy_srq; 3226 dev->ib_dev.post_srq_recv = mlx5_ib_post_srq_recv; 3227 dev->ib_dev.create_qp = mlx5_ib_create_qp; 3228 dev->ib_dev.modify_qp = mlx5_ib_modify_qp; 3229 dev->ib_dev.query_qp = mlx5_ib_query_qp; 3230 dev->ib_dev.destroy_qp = mlx5_ib_destroy_qp; 3231 dev->ib_dev.post_send = mlx5_ib_post_send; 3232 dev->ib_dev.post_recv = mlx5_ib_post_recv; 3233 dev->ib_dev.create_cq = mlx5_ib_create_cq; 3234 dev->ib_dev.modify_cq = mlx5_ib_modify_cq; 3235 dev->ib_dev.resize_cq = mlx5_ib_resize_cq; 3236 dev->ib_dev.destroy_cq = mlx5_ib_destroy_cq; 3237 dev->ib_dev.poll_cq = mlx5_ib_poll_cq; 3238 dev->ib_dev.req_notify_cq = mlx5_ib_arm_cq; 3239 dev->ib_dev.get_dma_mr = mlx5_ib_get_dma_mr; 3240 dev->ib_dev.reg_user_mr = mlx5_ib_reg_user_mr; 3241 dev->ib_dev.rereg_user_mr = mlx5_ib_rereg_user_mr; 3242 dev->ib_dev.dereg_mr = mlx5_ib_dereg_mr; 3243 dev->ib_dev.attach_mcast = mlx5_ib_mcg_attach; 3244 dev->ib_dev.detach_mcast = mlx5_ib_mcg_detach; 3245 dev->ib_dev.process_mad = mlx5_ib_process_mad; 3246 dev->ib_dev.alloc_mr = mlx5_ib_alloc_mr; 3247 dev->ib_dev.map_mr_sg = mlx5_ib_map_mr_sg; 3248 dev->ib_dev.check_mr_status = mlx5_ib_check_mr_status; 3249 dev->ib_dev.get_port_immutable = mlx5_port_immutable; 3250 dev->ib_dev.get_dev_fw_str = get_dev_fw_str; 3251 if (mlx5_core_is_pf(mdev)) { 3252 dev->ib_dev.get_vf_config = mlx5_ib_get_vf_config; 3253 dev->ib_dev.set_vf_link_state = mlx5_ib_set_vf_link_state; 3254 dev->ib_dev.get_vf_stats = mlx5_ib_get_vf_stats; 3255 dev->ib_dev.set_vf_guid = mlx5_ib_set_vf_guid; 3256 } 3257 3258 dev->ib_dev.disassociate_ucontext = mlx5_ib_disassociate_ucontext; 3259 3260 mlx5_ib_internal_fill_odp_caps(dev); 3261 3262 if (MLX5_CAP_GEN(mdev, imaicl)) { 3263 dev->ib_dev.alloc_mw = mlx5_ib_alloc_mw; 3264 dev->ib_dev.dealloc_mw = mlx5_ib_dealloc_mw; 3265 dev->ib_dev.uverbs_cmd_mask |= 3266 (1ull << IB_USER_VERBS_CMD_ALLOC_MW) | 3267 (1ull << IB_USER_VERBS_CMD_DEALLOC_MW); 3268 } 3269 3270 if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt) && 3271 MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) { 3272 dev->ib_dev.get_hw_stats = mlx5_ib_get_hw_stats; 3273 dev->ib_dev.alloc_hw_stats = mlx5_ib_alloc_hw_stats; 3274 } 3275 3276 if (MLX5_CAP_GEN(mdev, xrc)) { 3277 dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd; 3278 dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd; 3279 dev->ib_dev.uverbs_cmd_mask |= 3280 (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) | 3281 (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD); 3282 } 3283 3284 if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) == 3285 IB_LINK_LAYER_ETHERNET) { 3286 dev->ib_dev.create_flow = mlx5_ib_create_flow; 3287 dev->ib_dev.destroy_flow = mlx5_ib_destroy_flow; 3288 dev->ib_dev.create_wq = mlx5_ib_create_wq; 3289 dev->ib_dev.modify_wq = mlx5_ib_modify_wq; 3290 dev->ib_dev.destroy_wq = mlx5_ib_destroy_wq; 3291 dev->ib_dev.create_rwq_ind_table = mlx5_ib_create_rwq_ind_table; 3292 dev->ib_dev.destroy_rwq_ind_table = mlx5_ib_destroy_rwq_ind_table; 3293 dev->ib_dev.uverbs_ex_cmd_mask |= 3294 (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) | 3295 (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW) | 3296 (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | 3297 (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | 3298 (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) | 3299 (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | 3300 (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL); 3301 } 3302 err = init_node_data(dev); 3303 if (err) 3304 goto err_free_port; 3305 3306 mutex_init(&dev->flow_db.lock); 3307 mutex_init(&dev->cap_mask_mutex); 3308 INIT_LIST_HEAD(&dev->qp_list); 3309 spin_lock_init(&dev->reset_flow_resource_lock); 3310 3311 if (ll == IB_LINK_LAYER_ETHERNET) { 3312 err = mlx5_enable_eth(dev); 3313 if (err) 3314 goto err_free_port; 3315 } 3316 3317 err = create_dev_resources(&dev->devr); 3318 if (err) 3319 goto err_disable_eth; 3320 3321 err = mlx5_ib_odp_init_one(dev); 3322 if (err) 3323 goto err_rsrc; 3324 3325 err = mlx5_ib_alloc_q_counters(dev); 3326 if (err) 3327 goto err_odp; 3328 3329 dev->mdev->priv.uar = mlx5_get_uars_page(dev->mdev); 3330 if (!dev->mdev->priv.uar) 3331 goto err_q_cnt; 3332 3333 err = mlx5_alloc_bfreg(dev->mdev, &dev->bfreg, false, false); 3334 if (err) 3335 goto err_uar_page; 3336 3337 err = mlx5_alloc_bfreg(dev->mdev, &dev->fp_bfreg, false, true); 3338 if (err) 3339 goto err_bfreg; 3340 3341 err = ib_register_device(&dev->ib_dev, NULL); 3342 if (err) 3343 goto err_fp_bfreg; 3344 3345 err = create_umr_res(dev); 3346 if (err) 3347 goto err_dev; 3348 3349 for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) { 3350 err = device_create_file(&dev->ib_dev.dev, 3351 mlx5_class_attributes[i]); 3352 if (err) 3353 goto err_umrc; 3354 } 3355 3356 dev->ib_active = true; 3357 3358 return dev; 3359 3360 err_umrc: 3361 destroy_umrc_res(dev); 3362 3363 err_dev: 3364 ib_unregister_device(&dev->ib_dev); 3365 3366 err_fp_bfreg: 3367 mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg); 3368 3369 err_bfreg: 3370 mlx5_free_bfreg(dev->mdev, &dev->bfreg); 3371 3372 err_uar_page: 3373 mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar); 3374 3375 err_q_cnt: 3376 mlx5_ib_dealloc_q_counters(dev); 3377 3378 err_odp: 3379 mlx5_ib_odp_remove_one(dev); 3380 3381 err_rsrc: 3382 destroy_dev_resources(&dev->devr); 3383 3384 err_disable_eth: 3385 if (ll == IB_LINK_LAYER_ETHERNET) { 3386 mlx5_disable_eth(dev); 3387 mlx5_remove_netdev_notifier(dev); 3388 } 3389 3390 err_free_port: 3391 kfree(dev->port); 3392 3393 err_dealloc: 3394 ib_dealloc_device((struct ib_device *)dev); 3395 3396 return NULL; 3397 } 3398 3399 static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) 3400 { 3401 struct mlx5_ib_dev *dev = context; 3402 enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 1); 3403 3404 mlx5_remove_netdev_notifier(dev); 3405 ib_unregister_device(&dev->ib_dev); 3406 mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg); 3407 mlx5_free_bfreg(dev->mdev, &dev->bfreg); 3408 mlx5_put_uars_page(dev->mdev, mdev->priv.uar); 3409 mlx5_ib_dealloc_q_counters(dev); 3410 destroy_umrc_res(dev); 3411 mlx5_ib_odp_remove_one(dev); 3412 destroy_dev_resources(&dev->devr); 3413 if (ll == IB_LINK_LAYER_ETHERNET) 3414 mlx5_disable_eth(dev); 3415 kfree(dev->port); 3416 ib_dealloc_device(&dev->ib_dev); 3417 } 3418 3419 static struct mlx5_interface mlx5_ib_interface = { 3420 .add = mlx5_ib_add, 3421 .remove = mlx5_ib_remove, 3422 .event = mlx5_ib_event, 3423 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 3424 .pfault = mlx5_ib_pfault, 3425 #endif 3426 .protocol = MLX5_INTERFACE_PROTOCOL_IB, 3427 }; 3428 3429 static int __init mlx5_ib_init(void) 3430 { 3431 int err; 3432 3433 if (deprecated_prof_sel != 2) 3434 pr_warn("prof_sel is deprecated for mlx5_ib, set it for mlx5_core\n"); 3435 3436 err = mlx5_register_interface(&mlx5_ib_interface); 3437 3438 return err; 3439 } 3440 3441 static void __exit mlx5_ib_cleanup(void) 3442 { 3443 mlx5_unregister_interface(&mlx5_ib_interface); 3444 } 3445 3446 module_init(mlx5_ib_init); 3447 module_exit(mlx5_ib_cleanup); 3448