1 /* 2 * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. 3 * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the 9 * OpenIB.org BSD license below: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * - Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 */ 33 34 #include <linux/module.h> 35 #include <linux/init.h> 36 #include <linux/slab.h> 37 #include <linux/errno.h> 38 #include <linux/netdevice.h> 39 #include <linux/inetdevice.h> 40 #include <linux/rtnetlink.h> 41 #include <linux/if_vlan.h> 42 #include <net/ipv6.h> 43 #include <net/addrconf.h> 44 45 #include <rdma/ib_smi.h> 46 #include <rdma/ib_user_verbs.h> 47 #include <rdma/ib_addr.h> 48 #include <rdma/ib_cache.h> 49 50 #include <net/bonding.h> 51 52 #include <linux/mlx4/driver.h> 53 #include <linux/mlx4/cmd.h> 54 #include <linux/mlx4/qp.h> 55 56 #include "mlx4_ib.h" 57 #include "user.h" 58 59 #define DRV_NAME MLX4_IB_DRV_NAME 60 #define DRV_VERSION "2.2-1" 61 #define DRV_RELDATE "Feb 2014" 62 63 #define MLX4_IB_FLOW_MAX_PRIO 0xFFF 64 #define MLX4_IB_FLOW_QPN_MASK 0xFFFFFF 65 #define MLX4_IB_CARD_REV_A0 0xA0 66 67 MODULE_AUTHOR("Roland Dreier"); 68 MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver"); 69 MODULE_LICENSE("Dual BSD/GPL"); 70 MODULE_VERSION(DRV_VERSION); 71 72 int mlx4_ib_sm_guid_assign = 0; 73 module_param_named(sm_guid_assign, mlx4_ib_sm_guid_assign, int, 0444); 74 MODULE_PARM_DESC(sm_guid_assign, "Enable SM alias_GUID assignment if sm_guid_assign > 0 (Default: 0)"); 75 76 static const char mlx4_ib_version[] = 77 DRV_NAME ": Mellanox ConnectX InfiniBand driver v" 78 DRV_VERSION " (" DRV_RELDATE ")\n"; 79 80 static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init); 81 82 static struct workqueue_struct *wq; 83 84 static void init_query_mad(struct ib_smp *mad) 85 { 86 mad->base_version = 1; 87 mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED; 88 mad->class_version = 1; 89 mad->method = IB_MGMT_METHOD_GET; 90 } 91 92 static int check_flow_steering_support(struct mlx4_dev *dev) 93 { 94 int eth_num_ports = 0; 95 int ib_num_ports = 0; 96 97 int dmfs = dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED; 98 99 if (dmfs) { 100 int i; 101 mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) 102 eth_num_ports++; 103 mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) 104 ib_num_ports++; 105 dmfs &= (!ib_num_ports || 106 (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_IPOIB)) && 107 (!eth_num_ports || 108 (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN)); 109 if (ib_num_ports && mlx4_is_mfunc(dev)) { 110 pr_warn("Device managed flow steering is unavailable for IB port in multifunction env.\n"); 111 dmfs = 0; 112 } 113 } 114 return dmfs; 115 } 116 117 static int num_ib_ports(struct mlx4_dev *dev) 118 { 119 int ib_ports = 0; 120 int i; 121 122 mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) 123 ib_ports++; 124 125 return ib_ports; 126 } 127 128 static struct net_device *mlx4_ib_get_netdev(struct ib_device *device, u8 port_num) 129 { 130 struct mlx4_ib_dev *ibdev = to_mdev(device); 131 struct net_device *dev; 132 133 rcu_read_lock(); 134 dev = mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port_num); 135 136 if (dev) { 137 if (mlx4_is_bonded(ibdev->dev)) { 138 struct net_device *upper = NULL; 139 140 upper = netdev_master_upper_dev_get_rcu(dev); 141 if (upper) { 142 struct net_device *active; 143 144 active = bond_option_active_slave_get_rcu(netdev_priv(upper)); 145 if (active) 146 dev = active; 147 } 148 } 149 } 150 if (dev) 151 dev_hold(dev); 152 153 rcu_read_unlock(); 154 return dev; 155 } 156 157 static int mlx4_ib_update_gids(struct gid_entry *gids, 158 struct mlx4_ib_dev *ibdev, 159 u8 port_num) 160 { 161 struct mlx4_cmd_mailbox *mailbox; 162 int err; 163 struct mlx4_dev *dev = ibdev->dev; 164 int i; 165 union ib_gid *gid_tbl; 166 167 mailbox = mlx4_alloc_cmd_mailbox(dev); 168 if (IS_ERR(mailbox)) 169 return -ENOMEM; 170 171 gid_tbl = mailbox->buf; 172 173 for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) 174 memcpy(&gid_tbl[i], &gids[i].gid, sizeof(union ib_gid)); 175 176 err = mlx4_cmd(dev, mailbox->dma, 177 MLX4_SET_PORT_GID_TABLE << 8 | port_num, 178 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, 179 MLX4_CMD_WRAPPED); 180 if (mlx4_is_bonded(dev)) 181 err += mlx4_cmd(dev, mailbox->dma, 182 MLX4_SET_PORT_GID_TABLE << 8 | 2, 183 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, 184 MLX4_CMD_WRAPPED); 185 186 mlx4_free_cmd_mailbox(dev, mailbox); 187 return err; 188 } 189 190 static int mlx4_ib_add_gid(struct ib_device *device, 191 u8 port_num, 192 unsigned int index, 193 const union ib_gid *gid, 194 const struct ib_gid_attr *attr, 195 void **context) 196 { 197 struct mlx4_ib_dev *ibdev = to_mdev(device); 198 struct mlx4_ib_iboe *iboe = &ibdev->iboe; 199 struct mlx4_port_gid_table *port_gid_table; 200 int free = -1, found = -1; 201 int ret = 0; 202 int hw_update = 0; 203 int i; 204 struct gid_entry *gids = NULL; 205 206 if (!rdma_cap_roce_gid_table(device, port_num)) 207 return -EINVAL; 208 209 if (port_num > MLX4_MAX_PORTS) 210 return -EINVAL; 211 212 if (!context) 213 return -EINVAL; 214 215 port_gid_table = &iboe->gids[port_num - 1]; 216 spin_lock_bh(&iboe->lock); 217 for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) { 218 if (!memcmp(&port_gid_table->gids[i].gid, gid, sizeof(*gid))) { 219 found = i; 220 break; 221 } 222 if (free < 0 && !memcmp(&port_gid_table->gids[i].gid, &zgid, sizeof(*gid))) 223 free = i; /* HW has space */ 224 } 225 226 if (found < 0) { 227 if (free < 0) { 228 ret = -ENOSPC; 229 } else { 230 port_gid_table->gids[free].ctx = kmalloc(sizeof(*port_gid_table->gids[free].ctx), GFP_ATOMIC); 231 if (!port_gid_table->gids[free].ctx) { 232 ret = -ENOMEM; 233 } else { 234 *context = port_gid_table->gids[free].ctx; 235 memcpy(&port_gid_table->gids[free].gid, gid, sizeof(*gid)); 236 port_gid_table->gids[free].ctx->real_index = free; 237 port_gid_table->gids[free].ctx->refcount = 1; 238 hw_update = 1; 239 } 240 } 241 } else { 242 struct gid_cache_context *ctx = port_gid_table->gids[found].ctx; 243 *context = ctx; 244 ctx->refcount++; 245 } 246 if (!ret && hw_update) { 247 gids = kmalloc(sizeof(*gids) * MLX4_MAX_PORT_GIDS, GFP_ATOMIC); 248 if (!gids) { 249 ret = -ENOMEM; 250 } else { 251 for (i = 0; i < MLX4_MAX_PORT_GIDS; i++) 252 memcpy(&gids[i].gid, &port_gid_table->gids[i].gid, sizeof(union ib_gid)); 253 } 254 } 255 spin_unlock_bh(&iboe->lock); 256 257 if (!ret && hw_update) { 258 ret = mlx4_ib_update_gids(gids, ibdev, port_num); 259 kfree(gids); 260 } 261 262 return ret; 263 } 264 265 static int mlx4_ib_del_gid(struct ib_device *device, 266 u8 port_num, 267 unsigned int index, 268 void **context) 269 { 270 struct gid_cache_context *ctx = *context; 271 struct mlx4_ib_dev *ibdev = to_mdev(device); 272 struct mlx4_ib_iboe *iboe = &ibdev->iboe; 273 struct mlx4_port_gid_table *port_gid_table; 274 int ret = 0; 275 int hw_update = 0; 276 struct gid_entry *gids = NULL; 277 278 if (!rdma_cap_roce_gid_table(device, port_num)) 279 return -EINVAL; 280 281 if (port_num > MLX4_MAX_PORTS) 282 return -EINVAL; 283 284 port_gid_table = &iboe->gids[port_num - 1]; 285 spin_lock_bh(&iboe->lock); 286 if (ctx) { 287 ctx->refcount--; 288 if (!ctx->refcount) { 289 unsigned int real_index = ctx->real_index; 290 291 memcpy(&port_gid_table->gids[real_index].gid, &zgid, sizeof(zgid)); 292 kfree(port_gid_table->gids[real_index].ctx); 293 port_gid_table->gids[real_index].ctx = NULL; 294 hw_update = 1; 295 } 296 } 297 if (!ret && hw_update) { 298 int i; 299 300 gids = kmalloc(sizeof(*gids) * MLX4_MAX_PORT_GIDS, GFP_ATOMIC); 301 if (!gids) { 302 ret = -ENOMEM; 303 } else { 304 for (i = 0; i < MLX4_MAX_PORT_GIDS; i++) 305 memcpy(&gids[i].gid, &port_gid_table->gids[i].gid, sizeof(union ib_gid)); 306 } 307 } 308 spin_unlock_bh(&iboe->lock); 309 310 if (!ret && hw_update) { 311 ret = mlx4_ib_update_gids(gids, ibdev, port_num); 312 kfree(gids); 313 } 314 return ret; 315 } 316 317 int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev, 318 u8 port_num, int index) 319 { 320 struct mlx4_ib_iboe *iboe = &ibdev->iboe; 321 struct gid_cache_context *ctx = NULL; 322 union ib_gid gid; 323 struct mlx4_port_gid_table *port_gid_table; 324 int real_index = -EINVAL; 325 int i; 326 int ret; 327 unsigned long flags; 328 329 if (port_num > MLX4_MAX_PORTS) 330 return -EINVAL; 331 332 if (mlx4_is_bonded(ibdev->dev)) 333 port_num = 1; 334 335 if (!rdma_cap_roce_gid_table(&ibdev->ib_dev, port_num)) 336 return index; 337 338 ret = ib_get_cached_gid(&ibdev->ib_dev, port_num, index, &gid); 339 if (ret) 340 return ret; 341 342 if (!memcmp(&gid, &zgid, sizeof(gid))) 343 return -EINVAL; 344 345 spin_lock_irqsave(&iboe->lock, flags); 346 port_gid_table = &iboe->gids[port_num - 1]; 347 348 for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) 349 if (!memcmp(&port_gid_table->gids[i].gid, &gid, sizeof(gid))) { 350 ctx = port_gid_table->gids[i].ctx; 351 break; 352 } 353 if (ctx) 354 real_index = ctx->real_index; 355 spin_unlock_irqrestore(&iboe->lock, flags); 356 return real_index; 357 } 358 359 static int mlx4_ib_query_device(struct ib_device *ibdev, 360 struct ib_device_attr *props, 361 struct ib_udata *uhw) 362 { 363 struct mlx4_ib_dev *dev = to_mdev(ibdev); 364 struct ib_smp *in_mad = NULL; 365 struct ib_smp *out_mad = NULL; 366 int err = -ENOMEM; 367 int have_ib_ports; 368 struct mlx4_uverbs_ex_query_device cmd; 369 struct mlx4_uverbs_ex_query_device_resp resp = {.comp_mask = 0}; 370 struct mlx4_clock_params clock_params; 371 372 if (uhw->inlen) { 373 if (uhw->inlen < sizeof(cmd)) 374 return -EINVAL; 375 376 err = ib_copy_from_udata(&cmd, uhw, sizeof(cmd)); 377 if (err) 378 return err; 379 380 if (cmd.comp_mask) 381 return -EINVAL; 382 383 if (cmd.reserved) 384 return -EINVAL; 385 } 386 387 resp.response_length = offsetof(typeof(resp), response_length) + 388 sizeof(resp.response_length); 389 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); 390 out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); 391 if (!in_mad || !out_mad) 392 goto out; 393 394 init_query_mad(in_mad); 395 in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; 396 397 err = mlx4_MAD_IFC(to_mdev(ibdev), MLX4_MAD_IFC_IGNORE_KEYS, 398 1, NULL, NULL, in_mad, out_mad); 399 if (err) 400 goto out; 401 402 memset(props, 0, sizeof *props); 403 404 have_ib_ports = num_ib_ports(dev->dev); 405 406 props->fw_ver = dev->dev->caps.fw_ver; 407 props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT | 408 IB_DEVICE_PORT_ACTIVE_EVENT | 409 IB_DEVICE_SYS_IMAGE_GUID | 410 IB_DEVICE_RC_RNR_NAK_GEN | 411 IB_DEVICE_BLOCK_MULTICAST_LOOPBACK; 412 if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR) 413 props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR; 414 if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR) 415 props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR; 416 if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_APM && have_ib_ports) 417 props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG; 418 if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UD_AV_PORT) 419 props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE; 420 if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM) 421 props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM; 422 if (dev->dev->caps.max_gso_sz && 423 (dev->dev->rev_id != MLX4_IB_CARD_REV_A0) && 424 (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BLH)) 425 props->device_cap_flags |= IB_DEVICE_UD_TSO; 426 if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY) 427 props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY; 428 if ((dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_LOCAL_INV) && 429 (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_REMOTE_INV) && 430 (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_FAST_REG_WR)) 431 props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; 432 if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) 433 props->device_cap_flags |= IB_DEVICE_XRC; 434 if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW) 435 props->device_cap_flags |= IB_DEVICE_MEM_WINDOW; 436 if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) { 437 if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_WIN_TYPE_2B) 438 props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2B; 439 else 440 props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2A; 441 if (dev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED) 442 props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING; 443 } 444 445 props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) & 446 0xffffff; 447 props->vendor_part_id = dev->dev->persist->pdev->device; 448 props->hw_ver = be32_to_cpup((__be32 *) (out_mad->data + 32)); 449 memcpy(&props->sys_image_guid, out_mad->data + 4, 8); 450 451 props->max_mr_size = ~0ull; 452 props->page_size_cap = dev->dev->caps.page_size_cap; 453 props->max_qp = dev->dev->quotas.qp; 454 props->max_qp_wr = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE; 455 props->max_sge = min(dev->dev->caps.max_sq_sg, 456 dev->dev->caps.max_rq_sg); 457 props->max_sge_rd = props->max_sge; 458 props->max_cq = dev->dev->quotas.cq; 459 props->max_cqe = dev->dev->caps.max_cqes; 460 props->max_mr = dev->dev->quotas.mpt; 461 props->max_pd = dev->dev->caps.num_pds - dev->dev->caps.reserved_pds; 462 props->max_qp_rd_atom = dev->dev->caps.max_qp_dest_rdma; 463 props->max_qp_init_rd_atom = dev->dev->caps.max_qp_init_rdma; 464 props->max_res_rd_atom = props->max_qp_rd_atom * props->max_qp; 465 props->max_srq = dev->dev->quotas.srq; 466 props->max_srq_wr = dev->dev->caps.max_srq_wqes - 1; 467 props->max_srq_sge = dev->dev->caps.max_srq_sge; 468 props->max_fast_reg_page_list_len = MLX4_MAX_FAST_REG_PAGES; 469 props->local_ca_ack_delay = dev->dev->caps.local_ca_ack_delay; 470 props->atomic_cap = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ? 471 IB_ATOMIC_HCA : IB_ATOMIC_NONE; 472 props->masked_atomic_cap = props->atomic_cap; 473 props->max_pkeys = dev->dev->caps.pkey_table_len[1]; 474 props->max_mcast_grp = dev->dev->caps.num_mgms + dev->dev->caps.num_amgms; 475 props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm; 476 props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * 477 props->max_mcast_grp; 478 props->max_map_per_fmr = dev->dev->caps.max_fmr_maps; 479 props->hca_core_clock = dev->dev->caps.hca_core_clock * 1000UL; 480 props->timestamp_mask = 0xFFFFFFFFFFFFULL; 481 482 if (!mlx4_is_slave(dev->dev)) 483 err = mlx4_get_internal_clock_params(dev->dev, &clock_params); 484 485 if (uhw->outlen >= resp.response_length + sizeof(resp.hca_core_clock_offset)) { 486 resp.response_length += sizeof(resp.hca_core_clock_offset); 487 if (!err && !mlx4_is_slave(dev->dev)) { 488 resp.comp_mask |= QUERY_DEVICE_RESP_MASK_TIMESTAMP; 489 resp.hca_core_clock_offset = clock_params.offset % PAGE_SIZE; 490 } 491 } 492 493 if (uhw->outlen) { 494 err = ib_copy_to_udata(uhw, &resp, resp.response_length); 495 if (err) 496 goto out; 497 } 498 out: 499 kfree(in_mad); 500 kfree(out_mad); 501 502 return err; 503 } 504 505 static enum rdma_link_layer 506 mlx4_ib_port_link_layer(struct ib_device *device, u8 port_num) 507 { 508 struct mlx4_dev *dev = to_mdev(device)->dev; 509 510 return dev->caps.port_mask[port_num] == MLX4_PORT_TYPE_IB ? 511 IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET; 512 } 513 514 static int ib_link_query_port(struct ib_device *ibdev, u8 port, 515 struct ib_port_attr *props, int netw_view) 516 { 517 struct ib_smp *in_mad = NULL; 518 struct ib_smp *out_mad = NULL; 519 int ext_active_speed; 520 int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS; 521 int err = -ENOMEM; 522 523 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); 524 out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); 525 if (!in_mad || !out_mad) 526 goto out; 527 528 init_query_mad(in_mad); 529 in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; 530 in_mad->attr_mod = cpu_to_be32(port); 531 532 if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view) 533 mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW; 534 535 err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL, 536 in_mad, out_mad); 537 if (err) 538 goto out; 539 540 541 props->lid = be16_to_cpup((__be16 *) (out_mad->data + 16)); 542 props->lmc = out_mad->data[34] & 0x7; 543 props->sm_lid = be16_to_cpup((__be16 *) (out_mad->data + 18)); 544 props->sm_sl = out_mad->data[36] & 0xf; 545 props->state = out_mad->data[32] & 0xf; 546 props->phys_state = out_mad->data[33] >> 4; 547 props->port_cap_flags = be32_to_cpup((__be32 *) (out_mad->data + 20)); 548 if (netw_view) 549 props->gid_tbl_len = out_mad->data[50]; 550 else 551 props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port]; 552 props->max_msg_sz = to_mdev(ibdev)->dev->caps.max_msg_sz; 553 props->pkey_tbl_len = to_mdev(ibdev)->dev->caps.pkey_table_len[port]; 554 props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46)); 555 props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48)); 556 props->active_width = out_mad->data[31] & 0xf; 557 props->active_speed = out_mad->data[35] >> 4; 558 props->max_mtu = out_mad->data[41] & 0xf; 559 props->active_mtu = out_mad->data[36] >> 4; 560 props->subnet_timeout = out_mad->data[51] & 0x1f; 561 props->max_vl_num = out_mad->data[37] >> 4; 562 props->init_type_reply = out_mad->data[41] >> 4; 563 564 /* Check if extended speeds (EDR/FDR/...) are supported */ 565 if (props->port_cap_flags & IB_PORT_EXTENDED_SPEEDS_SUP) { 566 ext_active_speed = out_mad->data[62] >> 4; 567 568 switch (ext_active_speed) { 569 case 1: 570 props->active_speed = IB_SPEED_FDR; 571 break; 572 case 2: 573 props->active_speed = IB_SPEED_EDR; 574 break; 575 } 576 } 577 578 /* If reported active speed is QDR, check if is FDR-10 */ 579 if (props->active_speed == IB_SPEED_QDR) { 580 init_query_mad(in_mad); 581 in_mad->attr_id = MLX4_ATTR_EXTENDED_PORT_INFO; 582 in_mad->attr_mod = cpu_to_be32(port); 583 584 err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, 585 NULL, NULL, in_mad, out_mad); 586 if (err) 587 goto out; 588 589 /* Checking LinkSpeedActive for FDR-10 */ 590 if (out_mad->data[15] & 0x1) 591 props->active_speed = IB_SPEED_FDR10; 592 } 593 594 /* Avoid wrong speed value returned by FW if the IB link is down. */ 595 if (props->state == IB_PORT_DOWN) 596 props->active_speed = IB_SPEED_SDR; 597 598 out: 599 kfree(in_mad); 600 kfree(out_mad); 601 return err; 602 } 603 604 static u8 state_to_phys_state(enum ib_port_state state) 605 { 606 return state == IB_PORT_ACTIVE ? 5 : 3; 607 } 608 609 static int eth_link_query_port(struct ib_device *ibdev, u8 port, 610 struct ib_port_attr *props, int netw_view) 611 { 612 613 struct mlx4_ib_dev *mdev = to_mdev(ibdev); 614 struct mlx4_ib_iboe *iboe = &mdev->iboe; 615 struct net_device *ndev; 616 enum ib_mtu tmp; 617 struct mlx4_cmd_mailbox *mailbox; 618 int err = 0; 619 int is_bonded = mlx4_is_bonded(mdev->dev); 620 621 mailbox = mlx4_alloc_cmd_mailbox(mdev->dev); 622 if (IS_ERR(mailbox)) 623 return PTR_ERR(mailbox); 624 625 err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, port, 0, 626 MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B, 627 MLX4_CMD_WRAPPED); 628 if (err) 629 goto out; 630 631 props->active_width = (((u8 *)mailbox->buf)[5] == 0x40) ? 632 IB_WIDTH_4X : IB_WIDTH_1X; 633 props->active_speed = IB_SPEED_QDR; 634 props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_IP_BASED_GIDS; 635 props->gid_tbl_len = mdev->dev->caps.gid_table_len[port]; 636 props->max_msg_sz = mdev->dev->caps.max_msg_sz; 637 props->pkey_tbl_len = 1; 638 props->max_mtu = IB_MTU_4096; 639 props->max_vl_num = 2; 640 props->state = IB_PORT_DOWN; 641 props->phys_state = state_to_phys_state(props->state); 642 props->active_mtu = IB_MTU_256; 643 spin_lock_bh(&iboe->lock); 644 ndev = iboe->netdevs[port - 1]; 645 if (ndev && is_bonded) { 646 rcu_read_lock(); /* required to get upper dev */ 647 ndev = netdev_master_upper_dev_get_rcu(ndev); 648 rcu_read_unlock(); 649 } 650 if (!ndev) 651 goto out_unlock; 652 653 tmp = iboe_get_mtu(ndev->mtu); 654 props->active_mtu = tmp ? min(props->max_mtu, tmp) : IB_MTU_256; 655 656 props->state = (netif_running(ndev) && netif_carrier_ok(ndev)) ? 657 IB_PORT_ACTIVE : IB_PORT_DOWN; 658 props->phys_state = state_to_phys_state(props->state); 659 out_unlock: 660 spin_unlock_bh(&iboe->lock); 661 out: 662 mlx4_free_cmd_mailbox(mdev->dev, mailbox); 663 return err; 664 } 665 666 int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port, 667 struct ib_port_attr *props, int netw_view) 668 { 669 int err; 670 671 memset(props, 0, sizeof *props); 672 673 err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ? 674 ib_link_query_port(ibdev, port, props, netw_view) : 675 eth_link_query_port(ibdev, port, props, netw_view); 676 677 return err; 678 } 679 680 static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port, 681 struct ib_port_attr *props) 682 { 683 /* returns host view */ 684 return __mlx4_ib_query_port(ibdev, port, props, 0); 685 } 686 687 int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, 688 union ib_gid *gid, int netw_view) 689 { 690 struct ib_smp *in_mad = NULL; 691 struct ib_smp *out_mad = NULL; 692 int err = -ENOMEM; 693 struct mlx4_ib_dev *dev = to_mdev(ibdev); 694 int clear = 0; 695 int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS; 696 697 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); 698 out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); 699 if (!in_mad || !out_mad) 700 goto out; 701 702 init_query_mad(in_mad); 703 in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; 704 in_mad->attr_mod = cpu_to_be32(port); 705 706 if (mlx4_is_mfunc(dev->dev) && netw_view) 707 mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW; 708 709 err = mlx4_MAD_IFC(dev, mad_ifc_flags, port, NULL, NULL, in_mad, out_mad); 710 if (err) 711 goto out; 712 713 memcpy(gid->raw, out_mad->data + 8, 8); 714 715 if (mlx4_is_mfunc(dev->dev) && !netw_view) { 716 if (index) { 717 /* For any index > 0, return the null guid */ 718 err = 0; 719 clear = 1; 720 goto out; 721 } 722 } 723 724 init_query_mad(in_mad); 725 in_mad->attr_id = IB_SMP_ATTR_GUID_INFO; 726 in_mad->attr_mod = cpu_to_be32(index / 8); 727 728 err = mlx4_MAD_IFC(dev, mad_ifc_flags, port, 729 NULL, NULL, in_mad, out_mad); 730 if (err) 731 goto out; 732 733 memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8); 734 735 out: 736 if (clear) 737 memset(gid->raw + 8, 0, 8); 738 kfree(in_mad); 739 kfree(out_mad); 740 return err; 741 } 742 743 static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, 744 union ib_gid *gid) 745 { 746 int ret; 747 748 if (rdma_protocol_ib(ibdev, port)) 749 return __mlx4_ib_query_gid(ibdev, port, index, gid, 0); 750 751 if (!rdma_protocol_roce(ibdev, port)) 752 return -ENODEV; 753 754 if (!rdma_cap_roce_gid_table(ibdev, port)) 755 return -ENODEV; 756 757 ret = ib_get_cached_gid(ibdev, port, index, gid); 758 if (ret == -EAGAIN) { 759 memcpy(gid, &zgid, sizeof(*gid)); 760 return 0; 761 } 762 763 return ret; 764 } 765 766 int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, 767 u16 *pkey, int netw_view) 768 { 769 struct ib_smp *in_mad = NULL; 770 struct ib_smp *out_mad = NULL; 771 int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS; 772 int err = -ENOMEM; 773 774 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); 775 out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); 776 if (!in_mad || !out_mad) 777 goto out; 778 779 init_query_mad(in_mad); 780 in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE; 781 in_mad->attr_mod = cpu_to_be32(index / 32); 782 783 if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view) 784 mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW; 785 786 err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL, 787 in_mad, out_mad); 788 if (err) 789 goto out; 790 791 *pkey = be16_to_cpu(((__be16 *) out_mad->data)[index % 32]); 792 793 out: 794 kfree(in_mad); 795 kfree(out_mad); 796 return err; 797 } 798 799 static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) 800 { 801 return __mlx4_ib_query_pkey(ibdev, port, index, pkey, 0); 802 } 803 804 static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask, 805 struct ib_device_modify *props) 806 { 807 struct mlx4_cmd_mailbox *mailbox; 808 unsigned long flags; 809 810 if (mask & ~IB_DEVICE_MODIFY_NODE_DESC) 811 return -EOPNOTSUPP; 812 813 if (!(mask & IB_DEVICE_MODIFY_NODE_DESC)) 814 return 0; 815 816 if (mlx4_is_slave(to_mdev(ibdev)->dev)) 817 return -EOPNOTSUPP; 818 819 spin_lock_irqsave(&to_mdev(ibdev)->sm_lock, flags); 820 memcpy(ibdev->node_desc, props->node_desc, 64); 821 spin_unlock_irqrestore(&to_mdev(ibdev)->sm_lock, flags); 822 823 /* 824 * If possible, pass node desc to FW, so it can generate 825 * a 144 trap. If cmd fails, just ignore. 826 */ 827 mailbox = mlx4_alloc_cmd_mailbox(to_mdev(ibdev)->dev); 828 if (IS_ERR(mailbox)) 829 return 0; 830 831 memcpy(mailbox->buf, props->node_desc, 64); 832 mlx4_cmd(to_mdev(ibdev)->dev, mailbox->dma, 1, 0, 833 MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); 834 835 mlx4_free_cmd_mailbox(to_mdev(ibdev)->dev, mailbox); 836 837 return 0; 838 } 839 840 static int mlx4_ib_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols, 841 u32 cap_mask) 842 { 843 struct mlx4_cmd_mailbox *mailbox; 844 int err; 845 846 mailbox = mlx4_alloc_cmd_mailbox(dev->dev); 847 if (IS_ERR(mailbox)) 848 return PTR_ERR(mailbox); 849 850 if (dev->dev->flags & MLX4_FLAG_OLD_PORT_CMDS) { 851 *(u8 *) mailbox->buf = !!reset_qkey_viols << 6; 852 ((__be32 *) mailbox->buf)[2] = cpu_to_be32(cap_mask); 853 } else { 854 ((u8 *) mailbox->buf)[3] = !!reset_qkey_viols; 855 ((__be32 *) mailbox->buf)[1] = cpu_to_be32(cap_mask); 856 } 857 858 err = mlx4_cmd(dev->dev, mailbox->dma, port, MLX4_SET_PORT_IB_OPCODE, 859 MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, 860 MLX4_CMD_WRAPPED); 861 862 mlx4_free_cmd_mailbox(dev->dev, mailbox); 863 return err; 864 } 865 866 static int mlx4_ib_modify_port(struct ib_device *ibdev, u8 port, int mask, 867 struct ib_port_modify *props) 868 { 869 struct mlx4_ib_dev *mdev = to_mdev(ibdev); 870 u8 is_eth = mdev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH; 871 struct ib_port_attr attr; 872 u32 cap_mask; 873 int err; 874 875 /* return OK if this is RoCE. CM calls ib_modify_port() regardless 876 * of whether port link layer is ETH or IB. For ETH ports, qkey 877 * violations and port capabilities are not meaningful. 878 */ 879 if (is_eth) 880 return 0; 881 882 mutex_lock(&mdev->cap_mask_mutex); 883 884 err = mlx4_ib_query_port(ibdev, port, &attr); 885 if (err) 886 goto out; 887 888 cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) & 889 ~props->clr_port_cap_mask; 890 891 err = mlx4_ib_SET_PORT(mdev, port, 892 !!(mask & IB_PORT_RESET_QKEY_CNTR), 893 cap_mask); 894 895 out: 896 mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex); 897 return err; 898 } 899 900 static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev, 901 struct ib_udata *udata) 902 { 903 struct mlx4_ib_dev *dev = to_mdev(ibdev); 904 struct mlx4_ib_ucontext *context; 905 struct mlx4_ib_alloc_ucontext_resp_v3 resp_v3; 906 struct mlx4_ib_alloc_ucontext_resp resp; 907 int err; 908 909 if (!dev->ib_active) 910 return ERR_PTR(-EAGAIN); 911 912 if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) { 913 resp_v3.qp_tab_size = dev->dev->caps.num_qps; 914 resp_v3.bf_reg_size = dev->dev->caps.bf_reg_size; 915 resp_v3.bf_regs_per_page = dev->dev->caps.bf_regs_per_page; 916 } else { 917 resp.dev_caps = dev->dev->caps.userspace_caps; 918 resp.qp_tab_size = dev->dev->caps.num_qps; 919 resp.bf_reg_size = dev->dev->caps.bf_reg_size; 920 resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page; 921 resp.cqe_size = dev->dev->caps.cqe_size; 922 } 923 924 context = kzalloc(sizeof(*context), GFP_KERNEL); 925 if (!context) 926 return ERR_PTR(-ENOMEM); 927 928 err = mlx4_uar_alloc(to_mdev(ibdev)->dev, &context->uar); 929 if (err) { 930 kfree(context); 931 return ERR_PTR(err); 932 } 933 934 INIT_LIST_HEAD(&context->db_page_list); 935 mutex_init(&context->db_page_mutex); 936 937 if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) 938 err = ib_copy_to_udata(udata, &resp_v3, sizeof(resp_v3)); 939 else 940 err = ib_copy_to_udata(udata, &resp, sizeof(resp)); 941 942 if (err) { 943 mlx4_uar_free(to_mdev(ibdev)->dev, &context->uar); 944 kfree(context); 945 return ERR_PTR(-EFAULT); 946 } 947 948 return &context->ibucontext; 949 } 950 951 static int mlx4_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) 952 { 953 struct mlx4_ib_ucontext *context = to_mucontext(ibcontext); 954 955 mlx4_uar_free(to_mdev(ibcontext->device)->dev, &context->uar); 956 kfree(context); 957 958 return 0; 959 } 960 961 static void mlx4_ib_vma_open(struct vm_area_struct *area) 962 { 963 /* vma_open is called when a new VMA is created on top of our VMA. 964 * This is done through either mremap flow or split_vma (usually due 965 * to mlock, madvise, munmap, etc.). We do not support a clone of the 966 * vma, as this VMA is strongly hardware related. Therefore we set the 967 * vm_ops of the newly created/cloned VMA to NULL, to prevent it from 968 * calling us again and trying to do incorrect actions. We assume that 969 * the original vma size is exactly a single page that there will be no 970 * "splitting" operations on. 971 */ 972 area->vm_ops = NULL; 973 } 974 975 static void mlx4_ib_vma_close(struct vm_area_struct *area) 976 { 977 struct mlx4_ib_vma_private_data *mlx4_ib_vma_priv_data; 978 979 /* It's guaranteed that all VMAs opened on a FD are closed before the 980 * file itself is closed, therefore no sync is needed with the regular 981 * closing flow. (e.g. mlx4_ib_dealloc_ucontext) However need a sync 982 * with accessing the vma as part of mlx4_ib_disassociate_ucontext. 983 * The close operation is usually called under mm->mmap_sem except when 984 * process is exiting. The exiting case is handled explicitly as part 985 * of mlx4_ib_disassociate_ucontext. 986 */ 987 mlx4_ib_vma_priv_data = (struct mlx4_ib_vma_private_data *) 988 area->vm_private_data; 989 990 /* set the vma context pointer to null in the mlx4_ib driver's private 991 * data to protect against a race condition in mlx4_ib_dissassociate_ucontext(). 992 */ 993 mlx4_ib_vma_priv_data->vma = NULL; 994 } 995 996 static const struct vm_operations_struct mlx4_ib_vm_ops = { 997 .open = mlx4_ib_vma_open, 998 .close = mlx4_ib_vma_close 999 }; 1000 1001 static void mlx4_ib_disassociate_ucontext(struct ib_ucontext *ibcontext) 1002 { 1003 int i; 1004 int ret = 0; 1005 struct vm_area_struct *vma; 1006 struct mlx4_ib_ucontext *context = to_mucontext(ibcontext); 1007 struct task_struct *owning_process = NULL; 1008 struct mm_struct *owning_mm = NULL; 1009 1010 owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID); 1011 if (!owning_process) 1012 return; 1013 1014 owning_mm = get_task_mm(owning_process); 1015 if (!owning_mm) { 1016 pr_info("no mm, disassociate ucontext is pending task termination\n"); 1017 while (1) { 1018 /* make sure that task is dead before returning, it may 1019 * prevent a rare case of module down in parallel to a 1020 * call to mlx4_ib_vma_close. 1021 */ 1022 put_task_struct(owning_process); 1023 msleep(1); 1024 owning_process = get_pid_task(ibcontext->tgid, 1025 PIDTYPE_PID); 1026 if (!owning_process || 1027 owning_process->state == TASK_DEAD) { 1028 pr_info("disassociate ucontext done, task was terminated\n"); 1029 /* in case task was dead need to release the task struct */ 1030 if (owning_process) 1031 put_task_struct(owning_process); 1032 return; 1033 } 1034 } 1035 } 1036 1037 /* need to protect from a race on closing the vma as part of 1038 * mlx4_ib_vma_close(). 1039 */ 1040 down_read(&owning_mm->mmap_sem); 1041 for (i = 0; i < HW_BAR_COUNT; i++) { 1042 vma = context->hw_bar_info[i].vma; 1043 if (!vma) 1044 continue; 1045 1046 ret = zap_vma_ptes(context->hw_bar_info[i].vma, 1047 context->hw_bar_info[i].vma->vm_start, 1048 PAGE_SIZE); 1049 if (ret) { 1050 pr_err("Error: zap_vma_ptes failed for index=%d, ret=%d\n", i, ret); 1051 BUG_ON(1); 1052 } 1053 1054 /* context going to be destroyed, should not access ops any more */ 1055 context->hw_bar_info[i].vma->vm_ops = NULL; 1056 } 1057 1058 up_read(&owning_mm->mmap_sem); 1059 mmput(owning_mm); 1060 put_task_struct(owning_process); 1061 } 1062 1063 static void mlx4_ib_set_vma_data(struct vm_area_struct *vma, 1064 struct mlx4_ib_vma_private_data *vma_private_data) 1065 { 1066 vma_private_data->vma = vma; 1067 vma->vm_private_data = vma_private_data; 1068 vma->vm_ops = &mlx4_ib_vm_ops; 1069 } 1070 1071 static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) 1072 { 1073 struct mlx4_ib_dev *dev = to_mdev(context->device); 1074 struct mlx4_ib_ucontext *mucontext = to_mucontext(context); 1075 1076 if (vma->vm_end - vma->vm_start != PAGE_SIZE) 1077 return -EINVAL; 1078 1079 if (vma->vm_pgoff == 0) { 1080 /* We prevent double mmaping on same context */ 1081 if (mucontext->hw_bar_info[HW_BAR_DB].vma) 1082 return -EINVAL; 1083 1084 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 1085 1086 if (io_remap_pfn_range(vma, vma->vm_start, 1087 to_mucontext(context)->uar.pfn, 1088 PAGE_SIZE, vma->vm_page_prot)) 1089 return -EAGAIN; 1090 1091 mlx4_ib_set_vma_data(vma, &mucontext->hw_bar_info[HW_BAR_DB]); 1092 1093 } else if (vma->vm_pgoff == 1 && dev->dev->caps.bf_reg_size != 0) { 1094 /* We prevent double mmaping on same context */ 1095 if (mucontext->hw_bar_info[HW_BAR_BF].vma) 1096 return -EINVAL; 1097 1098 vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); 1099 1100 if (io_remap_pfn_range(vma, vma->vm_start, 1101 to_mucontext(context)->uar.pfn + 1102 dev->dev->caps.num_uars, 1103 PAGE_SIZE, vma->vm_page_prot)) 1104 return -EAGAIN; 1105 1106 mlx4_ib_set_vma_data(vma, &mucontext->hw_bar_info[HW_BAR_BF]); 1107 1108 } else if (vma->vm_pgoff == 3) { 1109 struct mlx4_clock_params params; 1110 int ret; 1111 1112 /* We prevent double mmaping on same context */ 1113 if (mucontext->hw_bar_info[HW_BAR_CLOCK].vma) 1114 return -EINVAL; 1115 1116 ret = mlx4_get_internal_clock_params(dev->dev, ¶ms); 1117 1118 if (ret) 1119 return ret; 1120 1121 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 1122 if (io_remap_pfn_range(vma, vma->vm_start, 1123 (pci_resource_start(dev->dev->persist->pdev, 1124 params.bar) + 1125 params.offset) 1126 >> PAGE_SHIFT, 1127 PAGE_SIZE, vma->vm_page_prot)) 1128 return -EAGAIN; 1129 1130 mlx4_ib_set_vma_data(vma, 1131 &mucontext->hw_bar_info[HW_BAR_CLOCK]); 1132 } else { 1133 return -EINVAL; 1134 } 1135 1136 return 0; 1137 } 1138 1139 static struct ib_pd *mlx4_ib_alloc_pd(struct ib_device *ibdev, 1140 struct ib_ucontext *context, 1141 struct ib_udata *udata) 1142 { 1143 struct mlx4_ib_pd *pd; 1144 int err; 1145 1146 pd = kmalloc(sizeof *pd, GFP_KERNEL); 1147 if (!pd) 1148 return ERR_PTR(-ENOMEM); 1149 1150 err = mlx4_pd_alloc(to_mdev(ibdev)->dev, &pd->pdn); 1151 if (err) { 1152 kfree(pd); 1153 return ERR_PTR(err); 1154 } 1155 1156 if (context) 1157 if (ib_copy_to_udata(udata, &pd->pdn, sizeof (__u32))) { 1158 mlx4_pd_free(to_mdev(ibdev)->dev, pd->pdn); 1159 kfree(pd); 1160 return ERR_PTR(-EFAULT); 1161 } 1162 1163 return &pd->ibpd; 1164 } 1165 1166 static int mlx4_ib_dealloc_pd(struct ib_pd *pd) 1167 { 1168 mlx4_pd_free(to_mdev(pd->device)->dev, to_mpd(pd)->pdn); 1169 kfree(pd); 1170 1171 return 0; 1172 } 1173 1174 static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev, 1175 struct ib_ucontext *context, 1176 struct ib_udata *udata) 1177 { 1178 struct mlx4_ib_xrcd *xrcd; 1179 struct ib_cq_init_attr cq_attr = {}; 1180 int err; 1181 1182 if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)) 1183 return ERR_PTR(-ENOSYS); 1184 1185 xrcd = kmalloc(sizeof *xrcd, GFP_KERNEL); 1186 if (!xrcd) 1187 return ERR_PTR(-ENOMEM); 1188 1189 err = mlx4_xrcd_alloc(to_mdev(ibdev)->dev, &xrcd->xrcdn); 1190 if (err) 1191 goto err1; 1192 1193 xrcd->pd = ib_alloc_pd(ibdev); 1194 if (IS_ERR(xrcd->pd)) { 1195 err = PTR_ERR(xrcd->pd); 1196 goto err2; 1197 } 1198 1199 cq_attr.cqe = 1; 1200 xrcd->cq = ib_create_cq(ibdev, NULL, NULL, xrcd, &cq_attr); 1201 if (IS_ERR(xrcd->cq)) { 1202 err = PTR_ERR(xrcd->cq); 1203 goto err3; 1204 } 1205 1206 return &xrcd->ibxrcd; 1207 1208 err3: 1209 ib_dealloc_pd(xrcd->pd); 1210 err2: 1211 mlx4_xrcd_free(to_mdev(ibdev)->dev, xrcd->xrcdn); 1212 err1: 1213 kfree(xrcd); 1214 return ERR_PTR(err); 1215 } 1216 1217 static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd) 1218 { 1219 ib_destroy_cq(to_mxrcd(xrcd)->cq); 1220 ib_dealloc_pd(to_mxrcd(xrcd)->pd); 1221 mlx4_xrcd_free(to_mdev(xrcd->device)->dev, to_mxrcd(xrcd)->xrcdn); 1222 kfree(xrcd); 1223 1224 return 0; 1225 } 1226 1227 static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid) 1228 { 1229 struct mlx4_ib_qp *mqp = to_mqp(ibqp); 1230 struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); 1231 struct mlx4_ib_gid_entry *ge; 1232 1233 ge = kzalloc(sizeof *ge, GFP_KERNEL); 1234 if (!ge) 1235 return -ENOMEM; 1236 1237 ge->gid = *gid; 1238 if (mlx4_ib_add_mc(mdev, mqp, gid)) { 1239 ge->port = mqp->port; 1240 ge->added = 1; 1241 } 1242 1243 mutex_lock(&mqp->mutex); 1244 list_add_tail(&ge->list, &mqp->gid_list); 1245 mutex_unlock(&mqp->mutex); 1246 1247 return 0; 1248 } 1249 1250 int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, 1251 union ib_gid *gid) 1252 { 1253 struct net_device *ndev; 1254 int ret = 0; 1255 1256 if (!mqp->port) 1257 return 0; 1258 1259 spin_lock_bh(&mdev->iboe.lock); 1260 ndev = mdev->iboe.netdevs[mqp->port - 1]; 1261 if (ndev) 1262 dev_hold(ndev); 1263 spin_unlock_bh(&mdev->iboe.lock); 1264 1265 if (ndev) { 1266 ret = 1; 1267 dev_put(ndev); 1268 } 1269 1270 return ret; 1271 } 1272 1273 struct mlx4_ib_steering { 1274 struct list_head list; 1275 struct mlx4_flow_reg_id reg_id; 1276 union ib_gid gid; 1277 }; 1278 1279 static int parse_flow_attr(struct mlx4_dev *dev, 1280 u32 qp_num, 1281 union ib_flow_spec *ib_spec, 1282 struct _rule_hw *mlx4_spec) 1283 { 1284 enum mlx4_net_trans_rule_id type; 1285 1286 switch (ib_spec->type) { 1287 case IB_FLOW_SPEC_ETH: 1288 type = MLX4_NET_TRANS_RULE_ID_ETH; 1289 memcpy(mlx4_spec->eth.dst_mac, ib_spec->eth.val.dst_mac, 1290 ETH_ALEN); 1291 memcpy(mlx4_spec->eth.dst_mac_msk, ib_spec->eth.mask.dst_mac, 1292 ETH_ALEN); 1293 mlx4_spec->eth.vlan_tag = ib_spec->eth.val.vlan_tag; 1294 mlx4_spec->eth.vlan_tag_msk = ib_spec->eth.mask.vlan_tag; 1295 break; 1296 case IB_FLOW_SPEC_IB: 1297 type = MLX4_NET_TRANS_RULE_ID_IB; 1298 mlx4_spec->ib.l3_qpn = 1299 cpu_to_be32(qp_num); 1300 mlx4_spec->ib.qpn_mask = 1301 cpu_to_be32(MLX4_IB_FLOW_QPN_MASK); 1302 break; 1303 1304 1305 case IB_FLOW_SPEC_IPV4: 1306 type = MLX4_NET_TRANS_RULE_ID_IPV4; 1307 mlx4_spec->ipv4.src_ip = ib_spec->ipv4.val.src_ip; 1308 mlx4_spec->ipv4.src_ip_msk = ib_spec->ipv4.mask.src_ip; 1309 mlx4_spec->ipv4.dst_ip = ib_spec->ipv4.val.dst_ip; 1310 mlx4_spec->ipv4.dst_ip_msk = ib_spec->ipv4.mask.dst_ip; 1311 break; 1312 1313 case IB_FLOW_SPEC_TCP: 1314 case IB_FLOW_SPEC_UDP: 1315 type = ib_spec->type == IB_FLOW_SPEC_TCP ? 1316 MLX4_NET_TRANS_RULE_ID_TCP : 1317 MLX4_NET_TRANS_RULE_ID_UDP; 1318 mlx4_spec->tcp_udp.dst_port = ib_spec->tcp_udp.val.dst_port; 1319 mlx4_spec->tcp_udp.dst_port_msk = ib_spec->tcp_udp.mask.dst_port; 1320 mlx4_spec->tcp_udp.src_port = ib_spec->tcp_udp.val.src_port; 1321 mlx4_spec->tcp_udp.src_port_msk = ib_spec->tcp_udp.mask.src_port; 1322 break; 1323 1324 default: 1325 return -EINVAL; 1326 } 1327 if (mlx4_map_sw_to_hw_steering_id(dev, type) < 0 || 1328 mlx4_hw_rule_sz(dev, type) < 0) 1329 return -EINVAL; 1330 mlx4_spec->id = cpu_to_be16(mlx4_map_sw_to_hw_steering_id(dev, type)); 1331 mlx4_spec->size = mlx4_hw_rule_sz(dev, type) >> 2; 1332 return mlx4_hw_rule_sz(dev, type); 1333 } 1334 1335 struct default_rules { 1336 __u32 mandatory_fields[IB_FLOW_SPEC_SUPPORT_LAYERS]; 1337 __u32 mandatory_not_fields[IB_FLOW_SPEC_SUPPORT_LAYERS]; 1338 __u32 rules_create_list[IB_FLOW_SPEC_SUPPORT_LAYERS]; 1339 __u8 link_layer; 1340 }; 1341 static const struct default_rules default_table[] = { 1342 { 1343 .mandatory_fields = {IB_FLOW_SPEC_IPV4}, 1344 .mandatory_not_fields = {IB_FLOW_SPEC_ETH}, 1345 .rules_create_list = {IB_FLOW_SPEC_IB}, 1346 .link_layer = IB_LINK_LAYER_INFINIBAND 1347 } 1348 }; 1349 1350 static int __mlx4_ib_default_rules_match(struct ib_qp *qp, 1351 struct ib_flow_attr *flow_attr) 1352 { 1353 int i, j, k; 1354 void *ib_flow; 1355 const struct default_rules *pdefault_rules = default_table; 1356 u8 link_layer = rdma_port_get_link_layer(qp->device, flow_attr->port); 1357 1358 for (i = 0; i < ARRAY_SIZE(default_table); i++, pdefault_rules++) { 1359 __u32 field_types[IB_FLOW_SPEC_SUPPORT_LAYERS]; 1360 memset(&field_types, 0, sizeof(field_types)); 1361 1362 if (link_layer != pdefault_rules->link_layer) 1363 continue; 1364 1365 ib_flow = flow_attr + 1; 1366 /* we assume the specs are sorted */ 1367 for (j = 0, k = 0; k < IB_FLOW_SPEC_SUPPORT_LAYERS && 1368 j < flow_attr->num_of_specs; k++) { 1369 union ib_flow_spec *current_flow = 1370 (union ib_flow_spec *)ib_flow; 1371 1372 /* same layer but different type */ 1373 if (((current_flow->type & IB_FLOW_SPEC_LAYER_MASK) == 1374 (pdefault_rules->mandatory_fields[k] & 1375 IB_FLOW_SPEC_LAYER_MASK)) && 1376 (current_flow->type != 1377 pdefault_rules->mandatory_fields[k])) 1378 goto out; 1379 1380 /* same layer, try match next one */ 1381 if (current_flow->type == 1382 pdefault_rules->mandatory_fields[k]) { 1383 j++; 1384 ib_flow += 1385 ((union ib_flow_spec *)ib_flow)->size; 1386 } 1387 } 1388 1389 ib_flow = flow_attr + 1; 1390 for (j = 0; j < flow_attr->num_of_specs; 1391 j++, ib_flow += ((union ib_flow_spec *)ib_flow)->size) 1392 for (k = 0; k < IB_FLOW_SPEC_SUPPORT_LAYERS; k++) 1393 /* same layer and same type */ 1394 if (((union ib_flow_spec *)ib_flow)->type == 1395 pdefault_rules->mandatory_not_fields[k]) 1396 goto out; 1397 1398 return i; 1399 } 1400 out: 1401 return -1; 1402 } 1403 1404 static int __mlx4_ib_create_default_rules( 1405 struct mlx4_ib_dev *mdev, 1406 struct ib_qp *qp, 1407 const struct default_rules *pdefault_rules, 1408 struct _rule_hw *mlx4_spec) { 1409 int size = 0; 1410 int i; 1411 1412 for (i = 0; i < ARRAY_SIZE(pdefault_rules->rules_create_list); i++) { 1413 int ret; 1414 union ib_flow_spec ib_spec; 1415 switch (pdefault_rules->rules_create_list[i]) { 1416 case 0: 1417 /* no rule */ 1418 continue; 1419 case IB_FLOW_SPEC_IB: 1420 ib_spec.type = IB_FLOW_SPEC_IB; 1421 ib_spec.size = sizeof(struct ib_flow_spec_ib); 1422 1423 break; 1424 default: 1425 /* invalid rule */ 1426 return -EINVAL; 1427 } 1428 /* We must put empty rule, qpn is being ignored */ 1429 ret = parse_flow_attr(mdev->dev, 0, &ib_spec, 1430 mlx4_spec); 1431 if (ret < 0) { 1432 pr_info("invalid parsing\n"); 1433 return -EINVAL; 1434 } 1435 1436 mlx4_spec = (void *)mlx4_spec + ret; 1437 size += ret; 1438 } 1439 return size; 1440 } 1441 1442 static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr, 1443 int domain, 1444 enum mlx4_net_trans_promisc_mode flow_type, 1445 u64 *reg_id) 1446 { 1447 int ret, i; 1448 int size = 0; 1449 void *ib_flow; 1450 struct mlx4_ib_dev *mdev = to_mdev(qp->device); 1451 struct mlx4_cmd_mailbox *mailbox; 1452 struct mlx4_net_trans_rule_hw_ctrl *ctrl; 1453 int default_flow; 1454 1455 static const u16 __mlx4_domain[] = { 1456 [IB_FLOW_DOMAIN_USER] = MLX4_DOMAIN_UVERBS, 1457 [IB_FLOW_DOMAIN_ETHTOOL] = MLX4_DOMAIN_ETHTOOL, 1458 [IB_FLOW_DOMAIN_RFS] = MLX4_DOMAIN_RFS, 1459 [IB_FLOW_DOMAIN_NIC] = MLX4_DOMAIN_NIC, 1460 }; 1461 1462 if (flow_attr->priority > MLX4_IB_FLOW_MAX_PRIO) { 1463 pr_err("Invalid priority value %d\n", flow_attr->priority); 1464 return -EINVAL; 1465 } 1466 1467 if (domain >= IB_FLOW_DOMAIN_NUM) { 1468 pr_err("Invalid domain value %d\n", domain); 1469 return -EINVAL; 1470 } 1471 1472 if (mlx4_map_sw_to_hw_steering_mode(mdev->dev, flow_type) < 0) 1473 return -EINVAL; 1474 1475 mailbox = mlx4_alloc_cmd_mailbox(mdev->dev); 1476 if (IS_ERR(mailbox)) 1477 return PTR_ERR(mailbox); 1478 ctrl = mailbox->buf; 1479 1480 ctrl->prio = cpu_to_be16(__mlx4_domain[domain] | 1481 flow_attr->priority); 1482 ctrl->type = mlx4_map_sw_to_hw_steering_mode(mdev->dev, flow_type); 1483 ctrl->port = flow_attr->port; 1484 ctrl->qpn = cpu_to_be32(qp->qp_num); 1485 1486 ib_flow = flow_attr + 1; 1487 size += sizeof(struct mlx4_net_trans_rule_hw_ctrl); 1488 /* Add default flows */ 1489 default_flow = __mlx4_ib_default_rules_match(qp, flow_attr); 1490 if (default_flow >= 0) { 1491 ret = __mlx4_ib_create_default_rules( 1492 mdev, qp, default_table + default_flow, 1493 mailbox->buf + size); 1494 if (ret < 0) { 1495 mlx4_free_cmd_mailbox(mdev->dev, mailbox); 1496 return -EINVAL; 1497 } 1498 size += ret; 1499 } 1500 for (i = 0; i < flow_attr->num_of_specs; i++) { 1501 ret = parse_flow_attr(mdev->dev, qp->qp_num, ib_flow, 1502 mailbox->buf + size); 1503 if (ret < 0) { 1504 mlx4_free_cmd_mailbox(mdev->dev, mailbox); 1505 return -EINVAL; 1506 } 1507 ib_flow += ((union ib_flow_spec *) ib_flow)->size; 1508 size += ret; 1509 } 1510 1511 ret = mlx4_cmd_imm(mdev->dev, mailbox->dma, reg_id, size >> 2, 0, 1512 MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A, 1513 MLX4_CMD_WRAPPED); 1514 if (ret == -ENOMEM) 1515 pr_err("mcg table is full. Fail to register network rule.\n"); 1516 else if (ret == -ENXIO) 1517 pr_err("Device managed flow steering is disabled. Fail to register network rule.\n"); 1518 else if (ret) 1519 pr_err("Invalid argumant. Fail to register network rule.\n"); 1520 1521 mlx4_free_cmd_mailbox(mdev->dev, mailbox); 1522 return ret; 1523 } 1524 1525 static int __mlx4_ib_destroy_flow(struct mlx4_dev *dev, u64 reg_id) 1526 { 1527 int err; 1528 err = mlx4_cmd(dev, reg_id, 0, 0, 1529 MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A, 1530 MLX4_CMD_WRAPPED); 1531 if (err) 1532 pr_err("Fail to detach network rule. registration id = 0x%llx\n", 1533 reg_id); 1534 return err; 1535 } 1536 1537 static int mlx4_ib_tunnel_steer_add(struct ib_qp *qp, struct ib_flow_attr *flow_attr, 1538 u64 *reg_id) 1539 { 1540 void *ib_flow; 1541 union ib_flow_spec *ib_spec; 1542 struct mlx4_dev *dev = to_mdev(qp->device)->dev; 1543 int err = 0; 1544 1545 if (dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN || 1546 dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC) 1547 return 0; /* do nothing */ 1548 1549 ib_flow = flow_attr + 1; 1550 ib_spec = (union ib_flow_spec *)ib_flow; 1551 1552 if (ib_spec->type != IB_FLOW_SPEC_ETH || flow_attr->num_of_specs != 1) 1553 return 0; /* do nothing */ 1554 1555 err = mlx4_tunnel_steer_add(to_mdev(qp->device)->dev, ib_spec->eth.val.dst_mac, 1556 flow_attr->port, qp->qp_num, 1557 MLX4_DOMAIN_UVERBS | (flow_attr->priority & 0xff), 1558 reg_id); 1559 return err; 1560 } 1561 1562 static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp, 1563 struct ib_flow_attr *flow_attr, 1564 int domain) 1565 { 1566 int err = 0, i = 0, j = 0; 1567 struct mlx4_ib_flow *mflow; 1568 enum mlx4_net_trans_promisc_mode type[2]; 1569 struct mlx4_dev *dev = (to_mdev(qp->device))->dev; 1570 int is_bonded = mlx4_is_bonded(dev); 1571 1572 memset(type, 0, sizeof(type)); 1573 1574 mflow = kzalloc(sizeof(*mflow), GFP_KERNEL); 1575 if (!mflow) { 1576 err = -ENOMEM; 1577 goto err_free; 1578 } 1579 1580 switch (flow_attr->type) { 1581 case IB_FLOW_ATTR_NORMAL: 1582 type[0] = MLX4_FS_REGULAR; 1583 break; 1584 1585 case IB_FLOW_ATTR_ALL_DEFAULT: 1586 type[0] = MLX4_FS_ALL_DEFAULT; 1587 break; 1588 1589 case IB_FLOW_ATTR_MC_DEFAULT: 1590 type[0] = MLX4_FS_MC_DEFAULT; 1591 break; 1592 1593 case IB_FLOW_ATTR_SNIFFER: 1594 type[0] = MLX4_FS_UC_SNIFFER; 1595 type[1] = MLX4_FS_MC_SNIFFER; 1596 break; 1597 1598 default: 1599 err = -EINVAL; 1600 goto err_free; 1601 } 1602 1603 while (i < ARRAY_SIZE(type) && type[i]) { 1604 err = __mlx4_ib_create_flow(qp, flow_attr, domain, type[i], 1605 &mflow->reg_id[i].id); 1606 if (err) 1607 goto err_create_flow; 1608 if (is_bonded) { 1609 /* Application always sees one port so the mirror rule 1610 * must be on port #2 1611 */ 1612 flow_attr->port = 2; 1613 err = __mlx4_ib_create_flow(qp, flow_attr, 1614 domain, type[j], 1615 &mflow->reg_id[j].mirror); 1616 flow_attr->port = 1; 1617 if (err) 1618 goto err_create_flow; 1619 j++; 1620 } 1621 1622 i++; 1623 } 1624 1625 if (i < ARRAY_SIZE(type) && flow_attr->type == IB_FLOW_ATTR_NORMAL) { 1626 err = mlx4_ib_tunnel_steer_add(qp, flow_attr, 1627 &mflow->reg_id[i].id); 1628 if (err) 1629 goto err_create_flow; 1630 1631 if (is_bonded) { 1632 flow_attr->port = 2; 1633 err = mlx4_ib_tunnel_steer_add(qp, flow_attr, 1634 &mflow->reg_id[j].mirror); 1635 flow_attr->port = 1; 1636 if (err) 1637 goto err_create_flow; 1638 j++; 1639 } 1640 /* function to create mirror rule */ 1641 i++; 1642 } 1643 1644 return &mflow->ibflow; 1645 1646 err_create_flow: 1647 while (i) { 1648 (void)__mlx4_ib_destroy_flow(to_mdev(qp->device)->dev, 1649 mflow->reg_id[i].id); 1650 i--; 1651 } 1652 1653 while (j) { 1654 (void)__mlx4_ib_destroy_flow(to_mdev(qp->device)->dev, 1655 mflow->reg_id[j].mirror); 1656 j--; 1657 } 1658 err_free: 1659 kfree(mflow); 1660 return ERR_PTR(err); 1661 } 1662 1663 static int mlx4_ib_destroy_flow(struct ib_flow *flow_id) 1664 { 1665 int err, ret = 0; 1666 int i = 0; 1667 struct mlx4_ib_dev *mdev = to_mdev(flow_id->qp->device); 1668 struct mlx4_ib_flow *mflow = to_mflow(flow_id); 1669 1670 while (i < ARRAY_SIZE(mflow->reg_id) && mflow->reg_id[i].id) { 1671 err = __mlx4_ib_destroy_flow(mdev->dev, mflow->reg_id[i].id); 1672 if (err) 1673 ret = err; 1674 if (mflow->reg_id[i].mirror) { 1675 err = __mlx4_ib_destroy_flow(mdev->dev, 1676 mflow->reg_id[i].mirror); 1677 if (err) 1678 ret = err; 1679 } 1680 i++; 1681 } 1682 1683 kfree(mflow); 1684 return ret; 1685 } 1686 1687 static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) 1688 { 1689 int err; 1690 struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); 1691 struct mlx4_dev *dev = mdev->dev; 1692 struct mlx4_ib_qp *mqp = to_mqp(ibqp); 1693 struct mlx4_ib_steering *ib_steering = NULL; 1694 enum mlx4_protocol prot = MLX4_PROT_IB_IPV6; 1695 struct mlx4_flow_reg_id reg_id; 1696 1697 if (mdev->dev->caps.steering_mode == 1698 MLX4_STEERING_MODE_DEVICE_MANAGED) { 1699 ib_steering = kmalloc(sizeof(*ib_steering), GFP_KERNEL); 1700 if (!ib_steering) 1701 return -ENOMEM; 1702 } 1703 1704 err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, mqp->port, 1705 !!(mqp->flags & 1706 MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK), 1707 prot, ®_id.id); 1708 if (err) { 1709 pr_err("multicast attach op failed, err %d\n", err); 1710 goto err_malloc; 1711 } 1712 1713 reg_id.mirror = 0; 1714 if (mlx4_is_bonded(dev)) { 1715 err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, 1716 (mqp->port == 1) ? 2 : 1, 1717 !!(mqp->flags & 1718 MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK), 1719 prot, ®_id.mirror); 1720 if (err) 1721 goto err_add; 1722 } 1723 1724 err = add_gid_entry(ibqp, gid); 1725 if (err) 1726 goto err_add; 1727 1728 if (ib_steering) { 1729 memcpy(ib_steering->gid.raw, gid->raw, 16); 1730 ib_steering->reg_id = reg_id; 1731 mutex_lock(&mqp->mutex); 1732 list_add(&ib_steering->list, &mqp->steering_rules); 1733 mutex_unlock(&mqp->mutex); 1734 } 1735 return 0; 1736 1737 err_add: 1738 mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw, 1739 prot, reg_id.id); 1740 if (reg_id.mirror) 1741 mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw, 1742 prot, reg_id.mirror); 1743 err_malloc: 1744 kfree(ib_steering); 1745 1746 return err; 1747 } 1748 1749 static struct mlx4_ib_gid_entry *find_gid_entry(struct mlx4_ib_qp *qp, u8 *raw) 1750 { 1751 struct mlx4_ib_gid_entry *ge; 1752 struct mlx4_ib_gid_entry *tmp; 1753 struct mlx4_ib_gid_entry *ret = NULL; 1754 1755 list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) { 1756 if (!memcmp(raw, ge->gid.raw, 16)) { 1757 ret = ge; 1758 break; 1759 } 1760 } 1761 1762 return ret; 1763 } 1764 1765 static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) 1766 { 1767 int err; 1768 struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); 1769 struct mlx4_dev *dev = mdev->dev; 1770 struct mlx4_ib_qp *mqp = to_mqp(ibqp); 1771 struct net_device *ndev; 1772 struct mlx4_ib_gid_entry *ge; 1773 struct mlx4_flow_reg_id reg_id = {0, 0}; 1774 enum mlx4_protocol prot = MLX4_PROT_IB_IPV6; 1775 1776 if (mdev->dev->caps.steering_mode == 1777 MLX4_STEERING_MODE_DEVICE_MANAGED) { 1778 struct mlx4_ib_steering *ib_steering; 1779 1780 mutex_lock(&mqp->mutex); 1781 list_for_each_entry(ib_steering, &mqp->steering_rules, list) { 1782 if (!memcmp(ib_steering->gid.raw, gid->raw, 16)) { 1783 list_del(&ib_steering->list); 1784 break; 1785 } 1786 } 1787 mutex_unlock(&mqp->mutex); 1788 if (&ib_steering->list == &mqp->steering_rules) { 1789 pr_err("Couldn't find reg_id for mgid. Steering rule is left attached\n"); 1790 return -EINVAL; 1791 } 1792 reg_id = ib_steering->reg_id; 1793 kfree(ib_steering); 1794 } 1795 1796 err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw, 1797 prot, reg_id.id); 1798 if (err) 1799 return err; 1800 1801 if (mlx4_is_bonded(dev)) { 1802 err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw, 1803 prot, reg_id.mirror); 1804 if (err) 1805 return err; 1806 } 1807 1808 mutex_lock(&mqp->mutex); 1809 ge = find_gid_entry(mqp, gid->raw); 1810 if (ge) { 1811 spin_lock_bh(&mdev->iboe.lock); 1812 ndev = ge->added ? mdev->iboe.netdevs[ge->port - 1] : NULL; 1813 if (ndev) 1814 dev_hold(ndev); 1815 spin_unlock_bh(&mdev->iboe.lock); 1816 if (ndev) 1817 dev_put(ndev); 1818 list_del(&ge->list); 1819 kfree(ge); 1820 } else 1821 pr_warn("could not find mgid entry\n"); 1822 1823 mutex_unlock(&mqp->mutex); 1824 1825 return 0; 1826 } 1827 1828 static int init_node_data(struct mlx4_ib_dev *dev) 1829 { 1830 struct ib_smp *in_mad = NULL; 1831 struct ib_smp *out_mad = NULL; 1832 int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS; 1833 int err = -ENOMEM; 1834 1835 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); 1836 out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); 1837 if (!in_mad || !out_mad) 1838 goto out; 1839 1840 init_query_mad(in_mad); 1841 in_mad->attr_id = IB_SMP_ATTR_NODE_DESC; 1842 if (mlx4_is_master(dev->dev)) 1843 mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW; 1844 1845 err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad); 1846 if (err) 1847 goto out; 1848 1849 memcpy(dev->ib_dev.node_desc, out_mad->data, 64); 1850 1851 in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; 1852 1853 err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad); 1854 if (err) 1855 goto out; 1856 1857 dev->dev->rev_id = be32_to_cpup((__be32 *) (out_mad->data + 32)); 1858 memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8); 1859 1860 out: 1861 kfree(in_mad); 1862 kfree(out_mad); 1863 return err; 1864 } 1865 1866 static ssize_t show_hca(struct device *device, struct device_attribute *attr, 1867 char *buf) 1868 { 1869 struct mlx4_ib_dev *dev = 1870 container_of(device, struct mlx4_ib_dev, ib_dev.dev); 1871 return sprintf(buf, "MT%d\n", dev->dev->persist->pdev->device); 1872 } 1873 1874 static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr, 1875 char *buf) 1876 { 1877 struct mlx4_ib_dev *dev = 1878 container_of(device, struct mlx4_ib_dev, ib_dev.dev); 1879 return sprintf(buf, "%d.%d.%d\n", (int) (dev->dev->caps.fw_ver >> 32), 1880 (int) (dev->dev->caps.fw_ver >> 16) & 0xffff, 1881 (int) dev->dev->caps.fw_ver & 0xffff); 1882 } 1883 1884 static ssize_t show_rev(struct device *device, struct device_attribute *attr, 1885 char *buf) 1886 { 1887 struct mlx4_ib_dev *dev = 1888 container_of(device, struct mlx4_ib_dev, ib_dev.dev); 1889 return sprintf(buf, "%x\n", dev->dev->rev_id); 1890 } 1891 1892 static ssize_t show_board(struct device *device, struct device_attribute *attr, 1893 char *buf) 1894 { 1895 struct mlx4_ib_dev *dev = 1896 container_of(device, struct mlx4_ib_dev, ib_dev.dev); 1897 return sprintf(buf, "%.*s\n", MLX4_BOARD_ID_LEN, 1898 dev->dev->board_id); 1899 } 1900 1901 static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); 1902 static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); 1903 static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL); 1904 static DEVICE_ATTR(board_id, S_IRUGO, show_board, NULL); 1905 1906 static struct device_attribute *mlx4_class_attributes[] = { 1907 &dev_attr_hw_rev, 1908 &dev_attr_fw_ver, 1909 &dev_attr_hca_type, 1910 &dev_attr_board_id 1911 }; 1912 1913 #define MLX4_IB_INVALID_MAC ((u64)-1) 1914 static void mlx4_ib_update_qps(struct mlx4_ib_dev *ibdev, 1915 struct net_device *dev, 1916 int port) 1917 { 1918 u64 new_smac = 0; 1919 u64 release_mac = MLX4_IB_INVALID_MAC; 1920 struct mlx4_ib_qp *qp; 1921 1922 read_lock(&dev_base_lock); 1923 new_smac = mlx4_mac_to_u64(dev->dev_addr); 1924 read_unlock(&dev_base_lock); 1925 1926 atomic64_set(&ibdev->iboe.mac[port - 1], new_smac); 1927 1928 /* no need for update QP1 and mac registration in non-SRIOV */ 1929 if (!mlx4_is_mfunc(ibdev->dev)) 1930 return; 1931 1932 mutex_lock(&ibdev->qp1_proxy_lock[port - 1]); 1933 qp = ibdev->qp1_proxy[port - 1]; 1934 if (qp) { 1935 int new_smac_index; 1936 u64 old_smac; 1937 struct mlx4_update_qp_params update_params; 1938 1939 mutex_lock(&qp->mutex); 1940 old_smac = qp->pri.smac; 1941 if (new_smac == old_smac) 1942 goto unlock; 1943 1944 new_smac_index = mlx4_register_mac(ibdev->dev, port, new_smac); 1945 1946 if (new_smac_index < 0) 1947 goto unlock; 1948 1949 update_params.smac_index = new_smac_index; 1950 if (mlx4_update_qp(ibdev->dev, qp->mqp.qpn, MLX4_UPDATE_QP_SMAC, 1951 &update_params)) { 1952 release_mac = new_smac; 1953 goto unlock; 1954 } 1955 /* if old port was zero, no mac was yet registered for this QP */ 1956 if (qp->pri.smac_port) 1957 release_mac = old_smac; 1958 qp->pri.smac = new_smac; 1959 qp->pri.smac_port = port; 1960 qp->pri.smac_index = new_smac_index; 1961 } 1962 1963 unlock: 1964 if (release_mac != MLX4_IB_INVALID_MAC) 1965 mlx4_unregister_mac(ibdev->dev, port, release_mac); 1966 if (qp) 1967 mutex_unlock(&qp->mutex); 1968 mutex_unlock(&ibdev->qp1_proxy_lock[port - 1]); 1969 } 1970 1971 static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev, 1972 struct net_device *dev, 1973 unsigned long event) 1974 1975 { 1976 struct mlx4_ib_iboe *iboe; 1977 int update_qps_port = -1; 1978 int port; 1979 1980 ASSERT_RTNL(); 1981 1982 iboe = &ibdev->iboe; 1983 1984 spin_lock_bh(&iboe->lock); 1985 mlx4_foreach_ib_transport_port(port, ibdev->dev) { 1986 1987 iboe->netdevs[port - 1] = 1988 mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port); 1989 1990 if (dev == iboe->netdevs[port - 1] && 1991 (event == NETDEV_CHANGEADDR || event == NETDEV_REGISTER || 1992 event == NETDEV_UP || event == NETDEV_CHANGE)) 1993 update_qps_port = port; 1994 1995 } 1996 spin_unlock_bh(&iboe->lock); 1997 1998 if (update_qps_port > 0) 1999 mlx4_ib_update_qps(ibdev, dev, update_qps_port); 2000 } 2001 2002 static int mlx4_ib_netdev_event(struct notifier_block *this, 2003 unsigned long event, void *ptr) 2004 { 2005 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 2006 struct mlx4_ib_dev *ibdev; 2007 2008 if (!net_eq(dev_net(dev), &init_net)) 2009 return NOTIFY_DONE; 2010 2011 ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb); 2012 mlx4_ib_scan_netdevs(ibdev, dev, event); 2013 2014 return NOTIFY_DONE; 2015 } 2016 2017 static void init_pkeys(struct mlx4_ib_dev *ibdev) 2018 { 2019 int port; 2020 int slave; 2021 int i; 2022 2023 if (mlx4_is_master(ibdev->dev)) { 2024 for (slave = 0; slave <= ibdev->dev->persist->num_vfs; 2025 ++slave) { 2026 for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) { 2027 for (i = 0; 2028 i < ibdev->dev->phys_caps.pkey_phys_table_len[port]; 2029 ++i) { 2030 ibdev->pkeys.virt2phys_pkey[slave][port - 1][i] = 2031 /* master has the identity virt2phys pkey mapping */ 2032 (slave == mlx4_master_func_num(ibdev->dev) || !i) ? i : 2033 ibdev->dev->phys_caps.pkey_phys_table_len[port] - 1; 2034 mlx4_sync_pkey_table(ibdev->dev, slave, port, i, 2035 ibdev->pkeys.virt2phys_pkey[slave][port - 1][i]); 2036 } 2037 } 2038 } 2039 /* initialize pkey cache */ 2040 for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) { 2041 for (i = 0; 2042 i < ibdev->dev->phys_caps.pkey_phys_table_len[port]; 2043 ++i) 2044 ibdev->pkeys.phys_pkey_cache[port-1][i] = 2045 (i) ? 0 : 0xFFFF; 2046 } 2047 } 2048 } 2049 2050 static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) 2051 { 2052 int i, j, eq = 0, total_eqs = 0; 2053 2054 ibdev->eq_table = kcalloc(dev->caps.num_comp_vectors, 2055 sizeof(ibdev->eq_table[0]), GFP_KERNEL); 2056 if (!ibdev->eq_table) 2057 return; 2058 2059 for (i = 1; i <= dev->caps.num_ports; i++) { 2060 for (j = 0; j < mlx4_get_eqs_per_port(dev, i); 2061 j++, total_eqs++) { 2062 if (i > 1 && mlx4_is_eq_shared(dev, total_eqs)) 2063 continue; 2064 ibdev->eq_table[eq] = total_eqs; 2065 if (!mlx4_assign_eq(dev, i, 2066 &ibdev->eq_table[eq])) 2067 eq++; 2068 else 2069 ibdev->eq_table[eq] = -1; 2070 } 2071 } 2072 2073 for (i = eq; i < dev->caps.num_comp_vectors; 2074 ibdev->eq_table[i++] = -1) 2075 ; 2076 2077 /* Advertise the new number of EQs to clients */ 2078 ibdev->ib_dev.num_comp_vectors = eq; 2079 } 2080 2081 static void mlx4_ib_free_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) 2082 { 2083 int i; 2084 int total_eqs = ibdev->ib_dev.num_comp_vectors; 2085 2086 /* no eqs were allocated */ 2087 if (!ibdev->eq_table) 2088 return; 2089 2090 /* Reset the advertised EQ number */ 2091 ibdev->ib_dev.num_comp_vectors = 0; 2092 2093 for (i = 0; i < total_eqs; i++) 2094 mlx4_release_eq(dev, ibdev->eq_table[i]); 2095 2096 kfree(ibdev->eq_table); 2097 ibdev->eq_table = NULL; 2098 } 2099 2100 static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num, 2101 struct ib_port_immutable *immutable) 2102 { 2103 struct ib_port_attr attr; 2104 int err; 2105 2106 err = mlx4_ib_query_port(ibdev, port_num, &attr); 2107 if (err) 2108 return err; 2109 2110 immutable->pkey_tbl_len = attr.pkey_tbl_len; 2111 immutable->gid_tbl_len = attr.gid_tbl_len; 2112 2113 if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND) 2114 immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB; 2115 else 2116 immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE; 2117 2118 immutable->max_mad_size = IB_MGMT_MAD_SIZE; 2119 2120 return 0; 2121 } 2122 2123 static void *mlx4_ib_add(struct mlx4_dev *dev) 2124 { 2125 struct mlx4_ib_dev *ibdev; 2126 int num_ports = 0; 2127 int i, j; 2128 int err; 2129 struct mlx4_ib_iboe *iboe; 2130 int ib_num_ports = 0; 2131 int num_req_counters; 2132 int allocated; 2133 u32 counter_index; 2134 2135 pr_info_once("%s", mlx4_ib_version); 2136 2137 num_ports = 0; 2138 mlx4_foreach_ib_transport_port(i, dev) 2139 num_ports++; 2140 2141 /* No point in registering a device with no ports... */ 2142 if (num_ports == 0) 2143 return NULL; 2144 2145 ibdev = (struct mlx4_ib_dev *) ib_alloc_device(sizeof *ibdev); 2146 if (!ibdev) { 2147 dev_err(&dev->persist->pdev->dev, 2148 "Device struct alloc failed\n"); 2149 return NULL; 2150 } 2151 2152 iboe = &ibdev->iboe; 2153 2154 if (mlx4_pd_alloc(dev, &ibdev->priv_pdn)) 2155 goto err_dealloc; 2156 2157 if (mlx4_uar_alloc(dev, &ibdev->priv_uar)) 2158 goto err_pd; 2159 2160 ibdev->uar_map = ioremap((phys_addr_t) ibdev->priv_uar.pfn << PAGE_SHIFT, 2161 PAGE_SIZE); 2162 if (!ibdev->uar_map) 2163 goto err_uar; 2164 MLX4_INIT_DOORBELL_LOCK(&ibdev->uar_lock); 2165 2166 ibdev->dev = dev; 2167 ibdev->bond_next_port = 0; 2168 2169 strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX); 2170 ibdev->ib_dev.owner = THIS_MODULE; 2171 ibdev->ib_dev.node_type = RDMA_NODE_IB_CA; 2172 ibdev->ib_dev.local_dma_lkey = dev->caps.reserved_lkey; 2173 ibdev->num_ports = num_ports; 2174 ibdev->ib_dev.phys_port_cnt = mlx4_is_bonded(dev) ? 2175 1 : ibdev->num_ports; 2176 ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors; 2177 ibdev->ib_dev.dma_device = &dev->persist->pdev->dev; 2178 ibdev->ib_dev.get_netdev = mlx4_ib_get_netdev; 2179 ibdev->ib_dev.add_gid = mlx4_ib_add_gid; 2180 ibdev->ib_dev.del_gid = mlx4_ib_del_gid; 2181 2182 if (dev->caps.userspace_caps) 2183 ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION; 2184 else 2185 ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION; 2186 2187 ibdev->ib_dev.uverbs_cmd_mask = 2188 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | 2189 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | 2190 (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | 2191 (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | 2192 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | 2193 (1ull << IB_USER_VERBS_CMD_REG_MR) | 2194 (1ull << IB_USER_VERBS_CMD_REREG_MR) | 2195 (1ull << IB_USER_VERBS_CMD_DEREG_MR) | 2196 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | 2197 (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | 2198 (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) | 2199 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) | 2200 (1ull << IB_USER_VERBS_CMD_CREATE_QP) | 2201 (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | 2202 (1ull << IB_USER_VERBS_CMD_QUERY_QP) | 2203 (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | 2204 (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) | 2205 (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) | 2206 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | 2207 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | 2208 (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | 2209 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | 2210 (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) | 2211 (1ull << IB_USER_VERBS_CMD_OPEN_QP); 2212 2213 ibdev->ib_dev.query_device = mlx4_ib_query_device; 2214 ibdev->ib_dev.query_port = mlx4_ib_query_port; 2215 ibdev->ib_dev.get_link_layer = mlx4_ib_port_link_layer; 2216 ibdev->ib_dev.query_gid = mlx4_ib_query_gid; 2217 ibdev->ib_dev.query_pkey = mlx4_ib_query_pkey; 2218 ibdev->ib_dev.modify_device = mlx4_ib_modify_device; 2219 ibdev->ib_dev.modify_port = mlx4_ib_modify_port; 2220 ibdev->ib_dev.alloc_ucontext = mlx4_ib_alloc_ucontext; 2221 ibdev->ib_dev.dealloc_ucontext = mlx4_ib_dealloc_ucontext; 2222 ibdev->ib_dev.mmap = mlx4_ib_mmap; 2223 ibdev->ib_dev.alloc_pd = mlx4_ib_alloc_pd; 2224 ibdev->ib_dev.dealloc_pd = mlx4_ib_dealloc_pd; 2225 ibdev->ib_dev.create_ah = mlx4_ib_create_ah; 2226 ibdev->ib_dev.query_ah = mlx4_ib_query_ah; 2227 ibdev->ib_dev.destroy_ah = mlx4_ib_destroy_ah; 2228 ibdev->ib_dev.create_srq = mlx4_ib_create_srq; 2229 ibdev->ib_dev.modify_srq = mlx4_ib_modify_srq; 2230 ibdev->ib_dev.query_srq = mlx4_ib_query_srq; 2231 ibdev->ib_dev.destroy_srq = mlx4_ib_destroy_srq; 2232 ibdev->ib_dev.post_srq_recv = mlx4_ib_post_srq_recv; 2233 ibdev->ib_dev.create_qp = mlx4_ib_create_qp; 2234 ibdev->ib_dev.modify_qp = mlx4_ib_modify_qp; 2235 ibdev->ib_dev.query_qp = mlx4_ib_query_qp; 2236 ibdev->ib_dev.destroy_qp = mlx4_ib_destroy_qp; 2237 ibdev->ib_dev.post_send = mlx4_ib_post_send; 2238 ibdev->ib_dev.post_recv = mlx4_ib_post_recv; 2239 ibdev->ib_dev.create_cq = mlx4_ib_create_cq; 2240 ibdev->ib_dev.modify_cq = mlx4_ib_modify_cq; 2241 ibdev->ib_dev.resize_cq = mlx4_ib_resize_cq; 2242 ibdev->ib_dev.destroy_cq = mlx4_ib_destroy_cq; 2243 ibdev->ib_dev.poll_cq = mlx4_ib_poll_cq; 2244 ibdev->ib_dev.req_notify_cq = mlx4_ib_arm_cq; 2245 ibdev->ib_dev.get_dma_mr = mlx4_ib_get_dma_mr; 2246 ibdev->ib_dev.reg_user_mr = mlx4_ib_reg_user_mr; 2247 ibdev->ib_dev.rereg_user_mr = mlx4_ib_rereg_user_mr; 2248 ibdev->ib_dev.dereg_mr = mlx4_ib_dereg_mr; 2249 ibdev->ib_dev.alloc_mr = mlx4_ib_alloc_mr; 2250 ibdev->ib_dev.alloc_fast_reg_page_list = mlx4_ib_alloc_fast_reg_page_list; 2251 ibdev->ib_dev.free_fast_reg_page_list = mlx4_ib_free_fast_reg_page_list; 2252 ibdev->ib_dev.attach_mcast = mlx4_ib_mcg_attach; 2253 ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach; 2254 ibdev->ib_dev.process_mad = mlx4_ib_process_mad; 2255 ibdev->ib_dev.get_port_immutable = mlx4_port_immutable; 2256 ibdev->ib_dev.disassociate_ucontext = mlx4_ib_disassociate_ucontext; 2257 2258 if (!mlx4_is_slave(ibdev->dev)) { 2259 ibdev->ib_dev.alloc_fmr = mlx4_ib_fmr_alloc; 2260 ibdev->ib_dev.map_phys_fmr = mlx4_ib_map_phys_fmr; 2261 ibdev->ib_dev.unmap_fmr = mlx4_ib_unmap_fmr; 2262 ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc; 2263 } 2264 2265 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW || 2266 dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) { 2267 ibdev->ib_dev.alloc_mw = mlx4_ib_alloc_mw; 2268 ibdev->ib_dev.bind_mw = mlx4_ib_bind_mw; 2269 ibdev->ib_dev.dealloc_mw = mlx4_ib_dealloc_mw; 2270 2271 ibdev->ib_dev.uverbs_cmd_mask |= 2272 (1ull << IB_USER_VERBS_CMD_ALLOC_MW) | 2273 (1ull << IB_USER_VERBS_CMD_DEALLOC_MW); 2274 } 2275 2276 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) { 2277 ibdev->ib_dev.alloc_xrcd = mlx4_ib_alloc_xrcd; 2278 ibdev->ib_dev.dealloc_xrcd = mlx4_ib_dealloc_xrcd; 2279 ibdev->ib_dev.uverbs_cmd_mask |= 2280 (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) | 2281 (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD); 2282 } 2283 2284 if (check_flow_steering_support(dev)) { 2285 ibdev->steering_support = MLX4_STEERING_MODE_DEVICE_MANAGED; 2286 ibdev->ib_dev.create_flow = mlx4_ib_create_flow; 2287 ibdev->ib_dev.destroy_flow = mlx4_ib_destroy_flow; 2288 2289 ibdev->ib_dev.uverbs_ex_cmd_mask |= 2290 (1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) | 2291 (1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW); 2292 } 2293 2294 ibdev->ib_dev.uverbs_ex_cmd_mask |= 2295 (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) | 2296 (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ); 2297 2298 mlx4_ib_alloc_eqs(dev, ibdev); 2299 2300 spin_lock_init(&iboe->lock); 2301 2302 if (init_node_data(ibdev)) 2303 goto err_map; 2304 2305 num_req_counters = mlx4_is_bonded(dev) ? 1 : ibdev->num_ports; 2306 for (i = 0; i < num_req_counters; ++i) { 2307 mutex_init(&ibdev->qp1_proxy_lock[i]); 2308 allocated = 0; 2309 if (mlx4_ib_port_link_layer(&ibdev->ib_dev, i + 1) == 2310 IB_LINK_LAYER_ETHERNET) { 2311 err = mlx4_counter_alloc(ibdev->dev, &counter_index); 2312 /* if failed to allocate a new counter, use default */ 2313 if (err) 2314 counter_index = 2315 mlx4_get_default_counter_index(dev, 2316 i + 1); 2317 else 2318 allocated = 1; 2319 } else { /* IB_LINK_LAYER_INFINIBAND use the default counter */ 2320 counter_index = mlx4_get_default_counter_index(dev, 2321 i + 1); 2322 } 2323 ibdev->counters[i].index = counter_index; 2324 ibdev->counters[i].allocated = allocated; 2325 pr_info("counter index %d for port %d allocated %d\n", 2326 counter_index, i + 1, allocated); 2327 } 2328 if (mlx4_is_bonded(dev)) 2329 for (i = 1; i < ibdev->num_ports ; ++i) { 2330 ibdev->counters[i].index = ibdev->counters[0].index; 2331 ibdev->counters[i].allocated = 0; 2332 } 2333 2334 mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) 2335 ib_num_ports++; 2336 2337 spin_lock_init(&ibdev->sm_lock); 2338 mutex_init(&ibdev->cap_mask_mutex); 2339 INIT_LIST_HEAD(&ibdev->qp_list); 2340 spin_lock_init(&ibdev->reset_flow_resource_lock); 2341 2342 if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED && 2343 ib_num_ports) { 2344 ibdev->steer_qpn_count = MLX4_IB_UC_MAX_NUM_QPS; 2345 err = mlx4_qp_reserve_range(dev, ibdev->steer_qpn_count, 2346 MLX4_IB_UC_STEER_QPN_ALIGN, 2347 &ibdev->steer_qpn_base, 0); 2348 if (err) 2349 goto err_counter; 2350 2351 ibdev->ib_uc_qpns_bitmap = 2352 kmalloc(BITS_TO_LONGS(ibdev->steer_qpn_count) * 2353 sizeof(long), 2354 GFP_KERNEL); 2355 if (!ibdev->ib_uc_qpns_bitmap) { 2356 dev_err(&dev->persist->pdev->dev, 2357 "bit map alloc failed\n"); 2358 goto err_steer_qp_release; 2359 } 2360 2361 bitmap_zero(ibdev->ib_uc_qpns_bitmap, ibdev->steer_qpn_count); 2362 2363 err = mlx4_FLOW_STEERING_IB_UC_QP_RANGE( 2364 dev, ibdev->steer_qpn_base, 2365 ibdev->steer_qpn_base + 2366 ibdev->steer_qpn_count - 1); 2367 if (err) 2368 goto err_steer_free_bitmap; 2369 } 2370 2371 for (j = 1; j <= ibdev->dev->caps.num_ports; j++) 2372 atomic64_set(&iboe->mac[j - 1], ibdev->dev->caps.def_mac[j]); 2373 2374 if (ib_register_device(&ibdev->ib_dev, NULL)) 2375 goto err_steer_free_bitmap; 2376 2377 if (mlx4_ib_mad_init(ibdev)) 2378 goto err_reg; 2379 2380 if (mlx4_ib_init_sriov(ibdev)) 2381 goto err_mad; 2382 2383 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE) { 2384 if (!iboe->nb.notifier_call) { 2385 iboe->nb.notifier_call = mlx4_ib_netdev_event; 2386 err = register_netdevice_notifier(&iboe->nb); 2387 if (err) { 2388 iboe->nb.notifier_call = NULL; 2389 goto err_notif; 2390 } 2391 } 2392 } 2393 2394 for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) { 2395 if (device_create_file(&ibdev->ib_dev.dev, 2396 mlx4_class_attributes[j])) 2397 goto err_notif; 2398 } 2399 2400 ibdev->ib_active = true; 2401 2402 if (mlx4_is_mfunc(ibdev->dev)) 2403 init_pkeys(ibdev); 2404 2405 /* create paravirt contexts for any VFs which are active */ 2406 if (mlx4_is_master(ibdev->dev)) { 2407 for (j = 0; j < MLX4_MFUNC_MAX; j++) { 2408 if (j == mlx4_master_func_num(ibdev->dev)) 2409 continue; 2410 if (mlx4_is_slave_active(ibdev->dev, j)) 2411 do_slave_init(ibdev, j, 1); 2412 } 2413 } 2414 return ibdev; 2415 2416 err_notif: 2417 if (ibdev->iboe.nb.notifier_call) { 2418 if (unregister_netdevice_notifier(&ibdev->iboe.nb)) 2419 pr_warn("failure unregistering notifier\n"); 2420 ibdev->iboe.nb.notifier_call = NULL; 2421 } 2422 flush_workqueue(wq); 2423 2424 mlx4_ib_close_sriov(ibdev); 2425 2426 err_mad: 2427 mlx4_ib_mad_cleanup(ibdev); 2428 2429 err_reg: 2430 ib_unregister_device(&ibdev->ib_dev); 2431 2432 err_steer_free_bitmap: 2433 kfree(ibdev->ib_uc_qpns_bitmap); 2434 2435 err_steer_qp_release: 2436 if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED) 2437 mlx4_qp_release_range(dev, ibdev->steer_qpn_base, 2438 ibdev->steer_qpn_count); 2439 err_counter: 2440 for (i = 0; i < ibdev->num_ports; ++i) { 2441 if (ibdev->counters[i].index != -1 && 2442 ibdev->counters[i].allocated) 2443 mlx4_counter_free(ibdev->dev, 2444 ibdev->counters[i].index); 2445 } 2446 err_map: 2447 iounmap(ibdev->uar_map); 2448 2449 err_uar: 2450 mlx4_uar_free(dev, &ibdev->priv_uar); 2451 2452 err_pd: 2453 mlx4_pd_free(dev, ibdev->priv_pdn); 2454 2455 err_dealloc: 2456 ib_dealloc_device(&ibdev->ib_dev); 2457 2458 return NULL; 2459 } 2460 2461 int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn) 2462 { 2463 int offset; 2464 2465 WARN_ON(!dev->ib_uc_qpns_bitmap); 2466 2467 offset = bitmap_find_free_region(dev->ib_uc_qpns_bitmap, 2468 dev->steer_qpn_count, 2469 get_count_order(count)); 2470 if (offset < 0) 2471 return offset; 2472 2473 *qpn = dev->steer_qpn_base + offset; 2474 return 0; 2475 } 2476 2477 void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count) 2478 { 2479 if (!qpn || 2480 dev->steering_support != MLX4_STEERING_MODE_DEVICE_MANAGED) 2481 return; 2482 2483 BUG_ON(qpn < dev->steer_qpn_base); 2484 2485 bitmap_release_region(dev->ib_uc_qpns_bitmap, 2486 qpn - dev->steer_qpn_base, 2487 get_count_order(count)); 2488 } 2489 2490 int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, 2491 int is_attach) 2492 { 2493 int err; 2494 size_t flow_size; 2495 struct ib_flow_attr *flow = NULL; 2496 struct ib_flow_spec_ib *ib_spec; 2497 2498 if (is_attach) { 2499 flow_size = sizeof(struct ib_flow_attr) + 2500 sizeof(struct ib_flow_spec_ib); 2501 flow = kzalloc(flow_size, GFP_KERNEL); 2502 if (!flow) 2503 return -ENOMEM; 2504 flow->port = mqp->port; 2505 flow->num_of_specs = 1; 2506 flow->size = flow_size; 2507 ib_spec = (struct ib_flow_spec_ib *)(flow + 1); 2508 ib_spec->type = IB_FLOW_SPEC_IB; 2509 ib_spec->size = sizeof(struct ib_flow_spec_ib); 2510 /* Add an empty rule for IB L2 */ 2511 memset(&ib_spec->mask, 0, sizeof(ib_spec->mask)); 2512 2513 err = __mlx4_ib_create_flow(&mqp->ibqp, flow, 2514 IB_FLOW_DOMAIN_NIC, 2515 MLX4_FS_REGULAR, 2516 &mqp->reg_id); 2517 } else { 2518 err = __mlx4_ib_destroy_flow(mdev->dev, mqp->reg_id); 2519 } 2520 kfree(flow); 2521 return err; 2522 } 2523 2524 static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) 2525 { 2526 struct mlx4_ib_dev *ibdev = ibdev_ptr; 2527 int p; 2528 2529 ibdev->ib_active = false; 2530 flush_workqueue(wq); 2531 2532 mlx4_ib_close_sriov(ibdev); 2533 mlx4_ib_mad_cleanup(ibdev); 2534 ib_unregister_device(&ibdev->ib_dev); 2535 if (ibdev->iboe.nb.notifier_call) { 2536 if (unregister_netdevice_notifier(&ibdev->iboe.nb)) 2537 pr_warn("failure unregistering notifier\n"); 2538 ibdev->iboe.nb.notifier_call = NULL; 2539 } 2540 2541 if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED) { 2542 mlx4_qp_release_range(dev, ibdev->steer_qpn_base, 2543 ibdev->steer_qpn_count); 2544 kfree(ibdev->ib_uc_qpns_bitmap); 2545 } 2546 2547 iounmap(ibdev->uar_map); 2548 for (p = 0; p < ibdev->num_ports; ++p) 2549 if (ibdev->counters[p].index != -1 && 2550 ibdev->counters[p].allocated) 2551 mlx4_counter_free(ibdev->dev, ibdev->counters[p].index); 2552 mlx4_foreach_port(p, dev, MLX4_PORT_TYPE_IB) 2553 mlx4_CLOSE_PORT(dev, p); 2554 2555 mlx4_ib_free_eqs(dev, ibdev); 2556 2557 mlx4_uar_free(dev, &ibdev->priv_uar); 2558 mlx4_pd_free(dev, ibdev->priv_pdn); 2559 ib_dealloc_device(&ibdev->ib_dev); 2560 } 2561 2562 static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init) 2563 { 2564 struct mlx4_ib_demux_work **dm = NULL; 2565 struct mlx4_dev *dev = ibdev->dev; 2566 int i; 2567 unsigned long flags; 2568 struct mlx4_active_ports actv_ports; 2569 unsigned int ports; 2570 unsigned int first_port; 2571 2572 if (!mlx4_is_master(dev)) 2573 return; 2574 2575 actv_ports = mlx4_get_active_ports(dev, slave); 2576 ports = bitmap_weight(actv_ports.ports, dev->caps.num_ports); 2577 first_port = find_first_bit(actv_ports.ports, dev->caps.num_ports); 2578 2579 dm = kcalloc(ports, sizeof(*dm), GFP_ATOMIC); 2580 if (!dm) { 2581 pr_err("failed to allocate memory for tunneling qp update\n"); 2582 return; 2583 } 2584 2585 for (i = 0; i < ports; i++) { 2586 dm[i] = kmalloc(sizeof (struct mlx4_ib_demux_work), GFP_ATOMIC); 2587 if (!dm[i]) { 2588 pr_err("failed to allocate memory for tunneling qp update work struct\n"); 2589 while (--i >= 0) 2590 kfree(dm[i]); 2591 goto out; 2592 } 2593 INIT_WORK(&dm[i]->work, mlx4_ib_tunnels_update_work); 2594 dm[i]->port = first_port + i + 1; 2595 dm[i]->slave = slave; 2596 dm[i]->do_init = do_init; 2597 dm[i]->dev = ibdev; 2598 } 2599 /* initialize or tear down tunnel QPs for the slave */ 2600 spin_lock_irqsave(&ibdev->sriov.going_down_lock, flags); 2601 if (!ibdev->sriov.is_going_down) { 2602 for (i = 0; i < ports; i++) 2603 queue_work(ibdev->sriov.demux[i].ud_wq, &dm[i]->work); 2604 spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags); 2605 } else { 2606 spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags); 2607 for (i = 0; i < ports; i++) 2608 kfree(dm[i]); 2609 } 2610 out: 2611 kfree(dm); 2612 return; 2613 } 2614 2615 static void mlx4_ib_handle_catas_error(struct mlx4_ib_dev *ibdev) 2616 { 2617 struct mlx4_ib_qp *mqp; 2618 unsigned long flags_qp; 2619 unsigned long flags_cq; 2620 struct mlx4_ib_cq *send_mcq, *recv_mcq; 2621 struct list_head cq_notify_list; 2622 struct mlx4_cq *mcq; 2623 unsigned long flags; 2624 2625 pr_warn("mlx4_ib_handle_catas_error was started\n"); 2626 INIT_LIST_HEAD(&cq_notify_list); 2627 2628 /* Go over qp list reside on that ibdev, sync with create/destroy qp.*/ 2629 spin_lock_irqsave(&ibdev->reset_flow_resource_lock, flags); 2630 2631 list_for_each_entry(mqp, &ibdev->qp_list, qps_list) { 2632 spin_lock_irqsave(&mqp->sq.lock, flags_qp); 2633 if (mqp->sq.tail != mqp->sq.head) { 2634 send_mcq = to_mcq(mqp->ibqp.send_cq); 2635 spin_lock_irqsave(&send_mcq->lock, flags_cq); 2636 if (send_mcq->mcq.comp && 2637 mqp->ibqp.send_cq->comp_handler) { 2638 if (!send_mcq->mcq.reset_notify_added) { 2639 send_mcq->mcq.reset_notify_added = 1; 2640 list_add_tail(&send_mcq->mcq.reset_notify, 2641 &cq_notify_list); 2642 } 2643 } 2644 spin_unlock_irqrestore(&send_mcq->lock, flags_cq); 2645 } 2646 spin_unlock_irqrestore(&mqp->sq.lock, flags_qp); 2647 /* Now, handle the QP's receive queue */ 2648 spin_lock_irqsave(&mqp->rq.lock, flags_qp); 2649 /* no handling is needed for SRQ */ 2650 if (!mqp->ibqp.srq) { 2651 if (mqp->rq.tail != mqp->rq.head) { 2652 recv_mcq = to_mcq(mqp->ibqp.recv_cq); 2653 spin_lock_irqsave(&recv_mcq->lock, flags_cq); 2654 if (recv_mcq->mcq.comp && 2655 mqp->ibqp.recv_cq->comp_handler) { 2656 if (!recv_mcq->mcq.reset_notify_added) { 2657 recv_mcq->mcq.reset_notify_added = 1; 2658 list_add_tail(&recv_mcq->mcq.reset_notify, 2659 &cq_notify_list); 2660 } 2661 } 2662 spin_unlock_irqrestore(&recv_mcq->lock, 2663 flags_cq); 2664 } 2665 } 2666 spin_unlock_irqrestore(&mqp->rq.lock, flags_qp); 2667 } 2668 2669 list_for_each_entry(mcq, &cq_notify_list, reset_notify) { 2670 mcq->comp(mcq); 2671 } 2672 spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags); 2673 pr_warn("mlx4_ib_handle_catas_error ended\n"); 2674 } 2675 2676 static void handle_bonded_port_state_event(struct work_struct *work) 2677 { 2678 struct ib_event_work *ew = 2679 container_of(work, struct ib_event_work, work); 2680 struct mlx4_ib_dev *ibdev = ew->ib_dev; 2681 enum ib_port_state bonded_port_state = IB_PORT_NOP; 2682 int i; 2683 struct ib_event ibev; 2684 2685 kfree(ew); 2686 spin_lock_bh(&ibdev->iboe.lock); 2687 for (i = 0; i < MLX4_MAX_PORTS; ++i) { 2688 struct net_device *curr_netdev = ibdev->iboe.netdevs[i]; 2689 enum ib_port_state curr_port_state; 2690 2691 if (!curr_netdev) 2692 continue; 2693 2694 curr_port_state = 2695 (netif_running(curr_netdev) && 2696 netif_carrier_ok(curr_netdev)) ? 2697 IB_PORT_ACTIVE : IB_PORT_DOWN; 2698 2699 bonded_port_state = (bonded_port_state != IB_PORT_ACTIVE) ? 2700 curr_port_state : IB_PORT_ACTIVE; 2701 } 2702 spin_unlock_bh(&ibdev->iboe.lock); 2703 2704 ibev.device = &ibdev->ib_dev; 2705 ibev.element.port_num = 1; 2706 ibev.event = (bonded_port_state == IB_PORT_ACTIVE) ? 2707 IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR; 2708 2709 ib_dispatch_event(&ibev); 2710 } 2711 2712 static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, 2713 enum mlx4_dev_event event, unsigned long param) 2714 { 2715 struct ib_event ibev; 2716 struct mlx4_ib_dev *ibdev = to_mdev((struct ib_device *) ibdev_ptr); 2717 struct mlx4_eqe *eqe = NULL; 2718 struct ib_event_work *ew; 2719 int p = 0; 2720 2721 if (mlx4_is_bonded(dev) && 2722 ((event == MLX4_DEV_EVENT_PORT_UP) || 2723 (event == MLX4_DEV_EVENT_PORT_DOWN))) { 2724 ew = kmalloc(sizeof(*ew), GFP_ATOMIC); 2725 if (!ew) 2726 return; 2727 INIT_WORK(&ew->work, handle_bonded_port_state_event); 2728 ew->ib_dev = ibdev; 2729 queue_work(wq, &ew->work); 2730 return; 2731 } 2732 2733 if (event == MLX4_DEV_EVENT_PORT_MGMT_CHANGE) 2734 eqe = (struct mlx4_eqe *)param; 2735 else 2736 p = (int) param; 2737 2738 switch (event) { 2739 case MLX4_DEV_EVENT_PORT_UP: 2740 if (p > ibdev->num_ports) 2741 return; 2742 if (mlx4_is_master(dev) && 2743 rdma_port_get_link_layer(&ibdev->ib_dev, p) == 2744 IB_LINK_LAYER_INFINIBAND) { 2745 mlx4_ib_invalidate_all_guid_record(ibdev, p); 2746 } 2747 ibev.event = IB_EVENT_PORT_ACTIVE; 2748 break; 2749 2750 case MLX4_DEV_EVENT_PORT_DOWN: 2751 if (p > ibdev->num_ports) 2752 return; 2753 ibev.event = IB_EVENT_PORT_ERR; 2754 break; 2755 2756 case MLX4_DEV_EVENT_CATASTROPHIC_ERROR: 2757 ibdev->ib_active = false; 2758 ibev.event = IB_EVENT_DEVICE_FATAL; 2759 mlx4_ib_handle_catas_error(ibdev); 2760 break; 2761 2762 case MLX4_DEV_EVENT_PORT_MGMT_CHANGE: 2763 ew = kmalloc(sizeof *ew, GFP_ATOMIC); 2764 if (!ew) { 2765 pr_err("failed to allocate memory for events work\n"); 2766 break; 2767 } 2768 2769 INIT_WORK(&ew->work, handle_port_mgmt_change_event); 2770 memcpy(&ew->ib_eqe, eqe, sizeof *eqe); 2771 ew->ib_dev = ibdev; 2772 /* need to queue only for port owner, which uses GEN_EQE */ 2773 if (mlx4_is_master(dev)) 2774 queue_work(wq, &ew->work); 2775 else 2776 handle_port_mgmt_change_event(&ew->work); 2777 return; 2778 2779 case MLX4_DEV_EVENT_SLAVE_INIT: 2780 /* here, p is the slave id */ 2781 do_slave_init(ibdev, p, 1); 2782 if (mlx4_is_master(dev)) { 2783 int i; 2784 2785 for (i = 1; i <= ibdev->num_ports; i++) { 2786 if (rdma_port_get_link_layer(&ibdev->ib_dev, i) 2787 == IB_LINK_LAYER_INFINIBAND) 2788 mlx4_ib_slave_alias_guid_event(ibdev, 2789 p, i, 2790 1); 2791 } 2792 } 2793 return; 2794 2795 case MLX4_DEV_EVENT_SLAVE_SHUTDOWN: 2796 if (mlx4_is_master(dev)) { 2797 int i; 2798 2799 for (i = 1; i <= ibdev->num_ports; i++) { 2800 if (rdma_port_get_link_layer(&ibdev->ib_dev, i) 2801 == IB_LINK_LAYER_INFINIBAND) 2802 mlx4_ib_slave_alias_guid_event(ibdev, 2803 p, i, 2804 0); 2805 } 2806 } 2807 /* here, p is the slave id */ 2808 do_slave_init(ibdev, p, 0); 2809 return; 2810 2811 default: 2812 return; 2813 } 2814 2815 ibev.device = ibdev_ptr; 2816 ibev.element.port_num = mlx4_is_bonded(ibdev->dev) ? 1 : (u8)p; 2817 2818 ib_dispatch_event(&ibev); 2819 } 2820 2821 static struct mlx4_interface mlx4_ib_interface = { 2822 .add = mlx4_ib_add, 2823 .remove = mlx4_ib_remove, 2824 .event = mlx4_ib_event, 2825 .protocol = MLX4_PROT_IB_IPV6, 2826 .flags = MLX4_INTFF_BONDING 2827 }; 2828 2829 static int __init mlx4_ib_init(void) 2830 { 2831 int err; 2832 2833 wq = create_singlethread_workqueue("mlx4_ib"); 2834 if (!wq) 2835 return -ENOMEM; 2836 2837 err = mlx4_ib_mcg_init(); 2838 if (err) 2839 goto clean_wq; 2840 2841 err = mlx4_register_interface(&mlx4_ib_interface); 2842 if (err) 2843 goto clean_mcg; 2844 2845 return 0; 2846 2847 clean_mcg: 2848 mlx4_ib_mcg_destroy(); 2849 2850 clean_wq: 2851 destroy_workqueue(wq); 2852 return err; 2853 } 2854 2855 static void __exit mlx4_ib_cleanup(void) 2856 { 2857 mlx4_unregister_interface(&mlx4_ib_interface); 2858 mlx4_ib_mcg_destroy(); 2859 destroy_workqueue(wq); 2860 } 2861 2862 module_init(mlx4_ib_init); 2863 module_exit(mlx4_ib_cleanup); 2864