1 /* 2 * Copyright (c) 2012-2016 VMware, Inc. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of EITHER the GNU General Public License 6 * version 2 as published by the Free Software Foundation or the BSD 7 * 2-Clause License. This program is distributed in the hope that it 8 * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED 9 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. 10 * See the GNU General Public License version 2 for more details at 11 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html. 12 * 13 * You should have received a copy of the GNU General Public License 14 * along with this program available in the file COPYING in the main 15 * directory of this source tree. 16 * 17 * The BSD 2-Clause License 18 * 19 * Redistribution and use in source and binary forms, with or 20 * without modification, are permitted provided that the following 21 * conditions are met: 22 * 23 * - Redistributions of source code must retain the above 24 * copyright notice, this list of conditions and the following 25 * disclaimer. 26 * 27 * - Redistributions in binary form must reproduce the above 28 * copyright notice, this list of conditions and the following 29 * disclaimer in the documentation and/or other materials 30 * provided with the distribution. 31 * 32 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 33 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 34 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 35 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 36 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 37 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 38 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 39 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 40 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 41 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 42 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 43 * OF THE POSSIBILITY OF SUCH DAMAGE. 44 */ 45 46 #include <asm/page.h> 47 #include <linux/inet.h> 48 #include <linux/io.h> 49 #include <rdma/ib_addr.h> 50 #include <rdma/ib_smi.h> 51 #include <rdma/ib_user_verbs.h> 52 #include <rdma/vmw_pvrdma-abi.h> 53 #include <rdma/uverbs_ioctl.h> 54 55 #include "pvrdma.h" 56 57 /** 58 * pvrdma_query_device - query device 59 * @ibdev: the device to query 60 * @props: the device properties 61 * @uhw: user data 62 * 63 * @return: 0 on success, otherwise negative errno 64 */ 65 int pvrdma_query_device(struct ib_device *ibdev, 66 struct ib_device_attr *props, 67 struct ib_udata *uhw) 68 { 69 struct pvrdma_dev *dev = to_vdev(ibdev); 70 71 if (uhw->inlen || uhw->outlen) 72 return -EINVAL; 73 74 props->fw_ver = dev->dsr->caps.fw_ver; 75 props->sys_image_guid = dev->dsr->caps.sys_image_guid; 76 props->max_mr_size = dev->dsr->caps.max_mr_size; 77 props->page_size_cap = dev->dsr->caps.page_size_cap; 78 props->vendor_id = dev->dsr->caps.vendor_id; 79 props->vendor_part_id = dev->pdev->device; 80 props->hw_ver = dev->dsr->caps.hw_ver; 81 props->max_qp = dev->dsr->caps.max_qp; 82 props->max_qp_wr = dev->dsr->caps.max_qp_wr; 83 props->device_cap_flags = dev->dsr->caps.device_cap_flags; 84 props->max_send_sge = dev->dsr->caps.max_sge; 85 props->max_recv_sge = dev->dsr->caps.max_sge; 86 props->max_sge_rd = PVRDMA_GET_CAP(dev, dev->dsr->caps.max_sge, 87 dev->dsr->caps.max_sge_rd); 88 props->max_srq = dev->dsr->caps.max_srq; 89 props->max_srq_wr = dev->dsr->caps.max_srq_wr; 90 props->max_srq_sge = dev->dsr->caps.max_srq_sge; 91 props->max_cq = dev->dsr->caps.max_cq; 92 props->max_cqe = dev->dsr->caps.max_cqe; 93 props->max_mr = dev->dsr->caps.max_mr; 94 props->max_pd = dev->dsr->caps.max_pd; 95 props->max_qp_rd_atom = dev->dsr->caps.max_qp_rd_atom; 96 props->max_qp_init_rd_atom = dev->dsr->caps.max_qp_init_rd_atom; 97 props->atomic_cap = 98 dev->dsr->caps.atomic_ops & 99 (PVRDMA_ATOMIC_OP_COMP_SWAP | PVRDMA_ATOMIC_OP_FETCH_ADD) ? 100 IB_ATOMIC_HCA : IB_ATOMIC_NONE; 101 props->masked_atomic_cap = props->atomic_cap; 102 props->max_ah = dev->dsr->caps.max_ah; 103 props->max_pkeys = dev->dsr->caps.max_pkeys; 104 props->local_ca_ack_delay = dev->dsr->caps.local_ca_ack_delay; 105 if ((dev->dsr->caps.bmme_flags & PVRDMA_BMME_FLAG_LOCAL_INV) && 106 (dev->dsr->caps.bmme_flags & PVRDMA_BMME_FLAG_REMOTE_INV) && 107 (dev->dsr->caps.bmme_flags & PVRDMA_BMME_FLAG_FAST_REG_WR)) { 108 props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; 109 props->max_fast_reg_page_list_len = PVRDMA_GET_CAP(dev, 110 PVRDMA_MAX_FAST_REG_PAGES, 111 dev->dsr->caps.max_fast_reg_page_list_len); 112 } 113 114 props->device_cap_flags |= IB_DEVICE_PORT_ACTIVE_EVENT | 115 IB_DEVICE_RC_RNR_NAK_GEN; 116 117 return 0; 118 } 119 120 /** 121 * pvrdma_query_port - query device port attributes 122 * @ibdev: the device to query 123 * @port: the port number 124 * @props: the device properties 125 * 126 * @return: 0 on success, otherwise negative errno 127 */ 128 int pvrdma_query_port(struct ib_device *ibdev, u32 port, 129 struct ib_port_attr *props) 130 { 131 struct pvrdma_dev *dev = to_vdev(ibdev); 132 union pvrdma_cmd_req req; 133 union pvrdma_cmd_resp rsp; 134 struct pvrdma_cmd_query_port *cmd = &req.query_port; 135 struct pvrdma_cmd_query_port_resp *resp = &rsp.query_port_resp; 136 int err; 137 138 memset(cmd, 0, sizeof(*cmd)); 139 cmd->hdr.cmd = PVRDMA_CMD_QUERY_PORT; 140 cmd->port_num = port; 141 142 err = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_QUERY_PORT_RESP); 143 if (err < 0) { 144 dev_warn(&dev->pdev->dev, 145 "could not query port, error: %d\n", err); 146 return err; 147 } 148 149 /* props being zeroed by the caller, avoid zeroing it here */ 150 151 props->state = pvrdma_port_state_to_ib(resp->attrs.state); 152 props->max_mtu = pvrdma_mtu_to_ib(resp->attrs.max_mtu); 153 props->active_mtu = pvrdma_mtu_to_ib(resp->attrs.active_mtu); 154 props->gid_tbl_len = resp->attrs.gid_tbl_len; 155 props->port_cap_flags = 156 pvrdma_port_cap_flags_to_ib(resp->attrs.port_cap_flags); 157 props->port_cap_flags |= IB_PORT_CM_SUP; 158 props->ip_gids = true; 159 props->max_msg_sz = resp->attrs.max_msg_sz; 160 props->bad_pkey_cntr = resp->attrs.bad_pkey_cntr; 161 props->qkey_viol_cntr = resp->attrs.qkey_viol_cntr; 162 props->pkey_tbl_len = resp->attrs.pkey_tbl_len; 163 props->lid = resp->attrs.lid; 164 props->sm_lid = resp->attrs.sm_lid; 165 props->lmc = resp->attrs.lmc; 166 props->max_vl_num = resp->attrs.max_vl_num; 167 props->sm_sl = resp->attrs.sm_sl; 168 props->subnet_timeout = resp->attrs.subnet_timeout; 169 props->init_type_reply = resp->attrs.init_type_reply; 170 props->active_width = pvrdma_port_width_to_ib(resp->attrs.active_width); 171 props->active_speed = pvrdma_port_speed_to_ib(resp->attrs.active_speed); 172 props->phys_state = resp->attrs.phys_state; 173 174 return 0; 175 } 176 177 /** 178 * pvrdma_query_gid - query device gid 179 * @ibdev: the device to query 180 * @port: the port number 181 * @index: the index 182 * @gid: the device gid value 183 * 184 * @return: 0 on success, otherwise negative errno 185 */ 186 int pvrdma_query_gid(struct ib_device *ibdev, u32 port, int index, 187 union ib_gid *gid) 188 { 189 struct pvrdma_dev *dev = to_vdev(ibdev); 190 191 if (index >= dev->dsr->caps.gid_tbl_len) 192 return -EINVAL; 193 194 memcpy(gid, &dev->sgid_tbl[index], sizeof(union ib_gid)); 195 196 return 0; 197 } 198 199 /** 200 * pvrdma_query_pkey - query device port's P_Key table 201 * @ibdev: the device to query 202 * @port: the port number 203 * @index: the index 204 * @pkey: the device P_Key value 205 * 206 * @return: 0 on success, otherwise negative errno 207 */ 208 int pvrdma_query_pkey(struct ib_device *ibdev, u32 port, u16 index, 209 u16 *pkey) 210 { 211 int err = 0; 212 union pvrdma_cmd_req req; 213 union pvrdma_cmd_resp rsp; 214 struct pvrdma_cmd_query_pkey *cmd = &req.query_pkey; 215 216 memset(cmd, 0, sizeof(*cmd)); 217 cmd->hdr.cmd = PVRDMA_CMD_QUERY_PKEY; 218 cmd->port_num = port; 219 cmd->index = index; 220 221 err = pvrdma_cmd_post(to_vdev(ibdev), &req, &rsp, 222 PVRDMA_CMD_QUERY_PKEY_RESP); 223 if (err < 0) { 224 dev_warn(&to_vdev(ibdev)->pdev->dev, 225 "could not query pkey, error: %d\n", err); 226 return err; 227 } 228 229 *pkey = rsp.query_pkey_resp.pkey; 230 231 return 0; 232 } 233 234 enum rdma_link_layer pvrdma_port_link_layer(struct ib_device *ibdev, 235 u32 port) 236 { 237 return IB_LINK_LAYER_ETHERNET; 238 } 239 240 /** 241 * pvrdma_modify_port - modify device port attributes 242 * @ibdev: the device to modify 243 * @port: the port number 244 * @mask: attributes to modify 245 * @props: the device properties 246 * 247 * @return: 0 on success, otherwise negative errno 248 */ 249 int pvrdma_modify_port(struct ib_device *ibdev, u32 port, int mask, 250 struct ib_port_modify *props) 251 { 252 struct ib_port_attr attr; 253 struct pvrdma_dev *vdev = to_vdev(ibdev); 254 int ret; 255 256 if (mask & ~IB_PORT_SHUTDOWN) { 257 dev_warn(&vdev->pdev->dev, 258 "unsupported port modify mask %#x\n", mask); 259 return -EOPNOTSUPP; 260 } 261 262 mutex_lock(&vdev->port_mutex); 263 ret = ib_query_port(ibdev, port, &attr); 264 if (ret) 265 goto out; 266 267 vdev->port_cap_mask |= props->set_port_cap_mask; 268 vdev->port_cap_mask &= ~props->clr_port_cap_mask; 269 270 if (mask & IB_PORT_SHUTDOWN) 271 vdev->ib_active = false; 272 273 out: 274 mutex_unlock(&vdev->port_mutex); 275 return ret; 276 } 277 278 /** 279 * pvrdma_alloc_ucontext - allocate ucontext 280 * @uctx: the uverbs countext 281 * @udata: user data 282 * 283 * @return: zero on success, otherwise errno. 284 */ 285 int pvrdma_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) 286 { 287 struct ib_device *ibdev = uctx->device; 288 struct pvrdma_dev *vdev = to_vdev(ibdev); 289 struct pvrdma_ucontext *context = to_vucontext(uctx); 290 union pvrdma_cmd_req req = {}; 291 union pvrdma_cmd_resp rsp = {}; 292 struct pvrdma_cmd_create_uc *cmd = &req.create_uc; 293 struct pvrdma_cmd_create_uc_resp *resp = &rsp.create_uc_resp; 294 struct pvrdma_alloc_ucontext_resp uresp = {}; 295 int ret; 296 297 if (!vdev->ib_active) 298 return -EAGAIN; 299 300 context->dev = vdev; 301 ret = pvrdma_uar_alloc(vdev, &context->uar); 302 if (ret) 303 return -ENOMEM; 304 305 /* get ctx_handle from host */ 306 if (vdev->dsr_version < PVRDMA_PPN64_VERSION) 307 cmd->pfn = context->uar.pfn; 308 else 309 cmd->pfn64 = context->uar.pfn; 310 311 cmd->hdr.cmd = PVRDMA_CMD_CREATE_UC; 312 ret = pvrdma_cmd_post(vdev, &req, &rsp, PVRDMA_CMD_CREATE_UC_RESP); 313 if (ret < 0) { 314 dev_warn(&vdev->pdev->dev, 315 "could not create ucontext, error: %d\n", ret); 316 goto err; 317 } 318 319 context->ctx_handle = resp->ctx_handle; 320 321 /* copy back to user */ 322 uresp.qp_tab_size = vdev->dsr->caps.max_qp; 323 ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); 324 if (ret) { 325 pvrdma_uar_free(vdev, &context->uar); 326 pvrdma_dealloc_ucontext(&context->ibucontext); 327 return -EFAULT; 328 } 329 330 return 0; 331 332 err: 333 pvrdma_uar_free(vdev, &context->uar); 334 return ret; 335 } 336 337 /** 338 * pvrdma_dealloc_ucontext - deallocate ucontext 339 * @ibcontext: the ucontext 340 */ 341 void pvrdma_dealloc_ucontext(struct ib_ucontext *ibcontext) 342 { 343 struct pvrdma_ucontext *context = to_vucontext(ibcontext); 344 union pvrdma_cmd_req req = {}; 345 struct pvrdma_cmd_destroy_uc *cmd = &req.destroy_uc; 346 int ret; 347 348 cmd->hdr.cmd = PVRDMA_CMD_DESTROY_UC; 349 cmd->ctx_handle = context->ctx_handle; 350 351 ret = pvrdma_cmd_post(context->dev, &req, NULL, 0); 352 if (ret < 0) 353 dev_warn(&context->dev->pdev->dev, 354 "destroy ucontext failed, error: %d\n", ret); 355 356 /* Free the UAR even if the device command failed */ 357 pvrdma_uar_free(to_vdev(ibcontext->device), &context->uar); 358 } 359 360 /** 361 * pvrdma_mmap - create mmap region 362 * @ibcontext: the user context 363 * @vma: the VMA 364 * 365 * @return: 0 on success, otherwise errno. 366 */ 367 int pvrdma_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) 368 { 369 struct pvrdma_ucontext *context = to_vucontext(ibcontext); 370 unsigned long start = vma->vm_start; 371 unsigned long size = vma->vm_end - vma->vm_start; 372 unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; 373 374 dev_dbg(&context->dev->pdev->dev, "create mmap region\n"); 375 376 if ((size != PAGE_SIZE) || (offset & ~PAGE_MASK)) { 377 dev_warn(&context->dev->pdev->dev, 378 "invalid params for mmap region\n"); 379 return -EINVAL; 380 } 381 382 /* Map UAR to kernel space, VM_LOCKED? */ 383 vm_flags_set(vma, VM_DONTCOPY | VM_DONTEXPAND); 384 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 385 if (io_remap_pfn_range(vma, start, context->uar.pfn, size, 386 vma->vm_page_prot)) 387 return -EAGAIN; 388 389 return 0; 390 } 391 392 /** 393 * pvrdma_alloc_pd - allocate protection domain 394 * @ibpd: PD pointer 395 * @udata: user data 396 * 397 * @return: the ib_pd protection domain pointer on success, otherwise errno. 398 */ 399 int pvrdma_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) 400 { 401 struct ib_device *ibdev = ibpd->device; 402 struct pvrdma_pd *pd = to_vpd(ibpd); 403 struct pvrdma_dev *dev = to_vdev(ibdev); 404 union pvrdma_cmd_req req = {}; 405 union pvrdma_cmd_resp rsp = {}; 406 struct pvrdma_cmd_create_pd *cmd = &req.create_pd; 407 struct pvrdma_cmd_create_pd_resp *resp = &rsp.create_pd_resp; 408 struct pvrdma_alloc_pd_resp pd_resp = {0}; 409 int ret; 410 struct pvrdma_ucontext *context = rdma_udata_to_drv_context( 411 udata, struct pvrdma_ucontext, ibucontext); 412 413 /* Check allowed max pds */ 414 if (!atomic_add_unless(&dev->num_pds, 1, dev->dsr->caps.max_pd)) 415 return -ENOMEM; 416 417 cmd->hdr.cmd = PVRDMA_CMD_CREATE_PD; 418 cmd->ctx_handle = context ? context->ctx_handle : 0; 419 ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_PD_RESP); 420 if (ret < 0) { 421 dev_warn(&dev->pdev->dev, 422 "failed to allocate protection domain, error: %d\n", 423 ret); 424 goto err; 425 } 426 427 pd->privileged = !udata; 428 pd->pd_handle = resp->pd_handle; 429 pd->pdn = resp->pd_handle; 430 pd_resp.pdn = resp->pd_handle; 431 432 if (udata) { 433 if (ib_copy_to_udata(udata, &pd_resp, sizeof(pd_resp))) { 434 dev_warn(&dev->pdev->dev, 435 "failed to copy back protection domain\n"); 436 pvrdma_dealloc_pd(&pd->ibpd, udata); 437 return -EFAULT; 438 } 439 } 440 441 /* u32 pd handle */ 442 return 0; 443 444 err: 445 atomic_dec(&dev->num_pds); 446 return ret; 447 } 448 449 /** 450 * pvrdma_dealloc_pd - deallocate protection domain 451 * @pd: the protection domain to be released 452 * @udata: user data or null for kernel object 453 * 454 * @return: Always 0 455 */ 456 int pvrdma_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) 457 { 458 struct pvrdma_dev *dev = to_vdev(pd->device); 459 union pvrdma_cmd_req req = {}; 460 struct pvrdma_cmd_destroy_pd *cmd = &req.destroy_pd; 461 int ret; 462 463 cmd->hdr.cmd = PVRDMA_CMD_DESTROY_PD; 464 cmd->pd_handle = to_vpd(pd)->pd_handle; 465 466 ret = pvrdma_cmd_post(dev, &req, NULL, 0); 467 if (ret) 468 dev_warn(&dev->pdev->dev, 469 "could not dealloc protection domain, error: %d\n", 470 ret); 471 472 atomic_dec(&dev->num_pds); 473 return 0; 474 } 475 476 /** 477 * pvrdma_create_ah - create an address handle 478 * @ibah: the IB address handle 479 * @init_attr: the attributes of the AH 480 * @udata: pointer to user data 481 * 482 * @return: 0 on success, otherwise errno. 483 */ 484 int pvrdma_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, 485 struct ib_udata *udata) 486 { 487 struct rdma_ah_attr *ah_attr = init_attr->ah_attr; 488 struct pvrdma_dev *dev = to_vdev(ibah->device); 489 struct pvrdma_ah *ah = to_vah(ibah); 490 const struct ib_global_route *grh; 491 u32 port_num = rdma_ah_get_port_num(ah_attr); 492 493 if (!(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH)) 494 return -EINVAL; 495 496 grh = rdma_ah_read_grh(ah_attr); 497 if ((ah_attr->type != RDMA_AH_ATTR_TYPE_ROCE) || 498 rdma_is_multicast_addr((struct in6_addr *)grh->dgid.raw)) 499 return -EINVAL; 500 501 if (!atomic_add_unless(&dev->num_ahs, 1, dev->dsr->caps.max_ah)) 502 return -ENOMEM; 503 504 ah->av.port_pd = to_vpd(ibah->pd)->pd_handle | (port_num << 24); 505 ah->av.src_path_bits = rdma_ah_get_path_bits(ah_attr); 506 ah->av.src_path_bits |= 0x80; 507 ah->av.gid_index = grh->sgid_index; 508 ah->av.hop_limit = grh->hop_limit; 509 ah->av.sl_tclass_flowlabel = (grh->traffic_class << 20) | 510 grh->flow_label; 511 memcpy(ah->av.dgid, grh->dgid.raw, 16); 512 memcpy(ah->av.dmac, ah_attr->roce.dmac, ETH_ALEN); 513 514 return 0; 515 } 516 517 /** 518 * pvrdma_destroy_ah - destroy an address handle 519 * @ah: the address handle to destroyed 520 * @flags: destroy address handle flags (see enum rdma_destroy_ah_flags) 521 * 522 */ 523 int pvrdma_destroy_ah(struct ib_ah *ah, u32 flags) 524 { 525 struct pvrdma_dev *dev = to_vdev(ah->device); 526 527 atomic_dec(&dev->num_ahs); 528 return 0; 529 } 530