1 /* 2 * Copyright (c) 2012-2016 VMware, Inc. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of EITHER the GNU General Public License 6 * version 2 as published by the Free Software Foundation or the BSD 7 * 2-Clause License. This program is distributed in the hope that it 8 * will be useful, but WITHOUT ANY WARRANTY; WITHOUT EVEN THE IMPLIED 9 * WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. 10 * See the GNU General Public License version 2 for more details at 11 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html. 12 * 13 * You should have received a copy of the GNU General Public License 14 * along with this program available in the file COPYING in the main 15 * directory of this source tree. 16 * 17 * The BSD 2-Clause License 18 * 19 * Redistribution and use in source and binary forms, with or 20 * without modification, are permitted provided that the following 21 * conditions are met: 22 * 23 * - Redistributions of source code must retain the above 24 * copyright notice, this list of conditions and the following 25 * disclaimer. 26 * 27 * - Redistributions in binary form must reproduce the above 28 * copyright notice, this list of conditions and the following 29 * disclaimer in the documentation and/or other materials 30 * provided with the distribution. 31 * 32 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 33 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 34 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 35 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 36 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 37 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 38 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 39 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 40 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 41 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 42 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 43 * OF THE POSSIBILITY OF SUCH DAMAGE. 44 */ 45 46 #include <asm/page.h> 47 #include <linux/io.h> 48 #include <linux/wait.h> 49 #include <rdma/ib_addr.h> 50 #include <rdma/ib_smi.h> 51 #include <rdma/ib_user_verbs.h> 52 #include <rdma/uverbs_ioctl.h> 53 54 #include "pvrdma.h" 55 56 /** 57 * pvrdma_req_notify_cq - request notification for a completion queue 58 * @ibcq: the completion queue 59 * @notify_flags: notification flags 60 * 61 * @return: 0 for success. 62 */ 63 int pvrdma_req_notify_cq(struct ib_cq *ibcq, 64 enum ib_cq_notify_flags notify_flags) 65 { 66 struct pvrdma_dev *dev = to_vdev(ibcq->device); 67 struct pvrdma_cq *cq = to_vcq(ibcq); 68 u32 val = cq->cq_handle; 69 unsigned long flags; 70 int has_data = 0; 71 72 val |= (notify_flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ? 73 PVRDMA_UAR_CQ_ARM_SOL : PVRDMA_UAR_CQ_ARM; 74 75 spin_lock_irqsave(&cq->cq_lock, flags); 76 77 pvrdma_write_uar_cq(dev, val); 78 79 if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) { 80 unsigned int head; 81 82 has_data = pvrdma_idx_ring_has_data(&cq->ring_state->rx, 83 cq->ibcq.cqe, &head); 84 if (unlikely(has_data == PVRDMA_INVALID_IDX)) 85 dev_err(&dev->pdev->dev, "CQ ring state invalid\n"); 86 } 87 88 spin_unlock_irqrestore(&cq->cq_lock, flags); 89 90 return has_data; 91 } 92 93 /** 94 * pvrdma_create_cq - create completion queue 95 * @ibcq: Allocated CQ 96 * @attr: completion queue attributes 97 * @attrs: bundle 98 * 99 * @return: 0 on success 100 */ 101 int pvrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, 102 struct uverbs_attr_bundle *attrs) 103 { 104 struct ib_udata *udata = &attrs->driver_udata; 105 struct ib_device *ibdev = ibcq->device; 106 int entries = attr->cqe; 107 struct pvrdma_dev *dev = to_vdev(ibdev); 108 struct pvrdma_cq *cq = to_vcq(ibcq); 109 int ret; 110 int npages; 111 unsigned long flags; 112 union pvrdma_cmd_req req; 113 union pvrdma_cmd_resp rsp; 114 struct pvrdma_cmd_create_cq *cmd = &req.create_cq; 115 struct pvrdma_cmd_create_cq_resp *resp = &rsp.create_cq_resp; 116 struct pvrdma_create_cq_resp cq_resp = {}; 117 struct pvrdma_create_cq ucmd; 118 struct pvrdma_ucontext *context = rdma_udata_to_drv_context( 119 udata, struct pvrdma_ucontext, ibucontext); 120 121 BUILD_BUG_ON(sizeof(struct pvrdma_cqe) != 64); 122 123 if (attr->flags) 124 return -EOPNOTSUPP; 125 126 entries = roundup_pow_of_two(entries); 127 if (entries < 1 || entries > dev->dsr->caps.max_cqe) 128 return -EINVAL; 129 130 if (!atomic_add_unless(&dev->num_cqs, 1, dev->dsr->caps.max_cq)) 131 return -ENOMEM; 132 133 cq->ibcq.cqe = entries; 134 cq->is_kernel = !udata; 135 136 if (!cq->is_kernel) { 137 if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { 138 ret = -EFAULT; 139 goto err_cq; 140 } 141 142 cq->umem = ib_umem_get(ibdev, ucmd.buf_addr, ucmd.buf_size, 143 IB_ACCESS_LOCAL_WRITE); 144 if (IS_ERR(cq->umem)) { 145 ret = PTR_ERR(cq->umem); 146 goto err_cq; 147 } 148 149 npages = ib_umem_num_dma_blocks(cq->umem, PAGE_SIZE); 150 } else { 151 /* One extra page for shared ring state */ 152 npages = 1 + (entries * sizeof(struct pvrdma_cqe) + 153 PAGE_SIZE - 1) / PAGE_SIZE; 154 155 /* Skip header page. */ 156 cq->offset = PAGE_SIZE; 157 } 158 159 if (npages < 0 || npages > PVRDMA_PAGE_DIR_MAX_PAGES) { 160 dev_warn(&dev->pdev->dev, 161 "overflow pages in completion queue\n"); 162 ret = -EINVAL; 163 goto err_umem; 164 } 165 166 ret = pvrdma_page_dir_init(dev, &cq->pdir, npages, cq->is_kernel); 167 if (ret) { 168 dev_warn(&dev->pdev->dev, 169 "could not allocate page directory\n"); 170 goto err_umem; 171 } 172 173 /* Ring state is always the first page. Set in library for user cq. */ 174 if (cq->is_kernel) 175 cq->ring_state = cq->pdir.pages[0]; 176 else 177 pvrdma_page_dir_insert_umem(&cq->pdir, cq->umem, 0); 178 179 refcount_set(&cq->refcnt, 1); 180 init_completion(&cq->free); 181 spin_lock_init(&cq->cq_lock); 182 183 memset(cmd, 0, sizeof(*cmd)); 184 cmd->hdr.cmd = PVRDMA_CMD_CREATE_CQ; 185 cmd->nchunks = npages; 186 cmd->ctx_handle = context ? context->ctx_handle : 0; 187 cmd->cqe = entries; 188 cmd->pdir_dma = cq->pdir.dir_dma; 189 ret = pvrdma_cmd_post(dev, &req, &rsp, PVRDMA_CMD_CREATE_CQ_RESP); 190 if (ret < 0) { 191 dev_warn(&dev->pdev->dev, 192 "could not create completion queue, error: %d\n", ret); 193 goto err_page_dir; 194 } 195 196 cq->ibcq.cqe = resp->cqe; 197 cq->cq_handle = resp->cq_handle; 198 cq_resp.cqn = resp->cq_handle; 199 spin_lock_irqsave(&dev->cq_tbl_lock, flags); 200 dev->cq_tbl[cq->cq_handle % dev->dsr->caps.max_cq] = cq; 201 spin_unlock_irqrestore(&dev->cq_tbl_lock, flags); 202 203 if (!cq->is_kernel) { 204 cq->uar = &context->uar; 205 206 /* Copy udata back. */ 207 if (ib_copy_to_udata(udata, &cq_resp, sizeof(cq_resp))) { 208 dev_warn(&dev->pdev->dev, 209 "failed to copy back udata\n"); 210 pvrdma_destroy_cq(&cq->ibcq, udata); 211 return -EINVAL; 212 } 213 } 214 215 return 0; 216 217 err_page_dir: 218 pvrdma_page_dir_cleanup(dev, &cq->pdir); 219 err_umem: 220 ib_umem_release(cq->umem); 221 err_cq: 222 atomic_dec(&dev->num_cqs); 223 return ret; 224 } 225 226 static void pvrdma_free_cq(struct pvrdma_dev *dev, struct pvrdma_cq *cq) 227 { 228 if (refcount_dec_and_test(&cq->refcnt)) 229 complete(&cq->free); 230 wait_for_completion(&cq->free); 231 232 ib_umem_release(cq->umem); 233 234 pvrdma_page_dir_cleanup(dev, &cq->pdir); 235 } 236 237 /** 238 * pvrdma_destroy_cq - destroy completion queue 239 * @cq: the completion queue to destroy. 240 * @udata: user data or null for kernel object 241 */ 242 int pvrdma_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) 243 { 244 struct pvrdma_cq *vcq = to_vcq(cq); 245 union pvrdma_cmd_req req; 246 struct pvrdma_cmd_destroy_cq *cmd = &req.destroy_cq; 247 struct pvrdma_dev *dev = to_vdev(cq->device); 248 unsigned long flags; 249 int ret; 250 251 memset(cmd, 0, sizeof(*cmd)); 252 cmd->hdr.cmd = PVRDMA_CMD_DESTROY_CQ; 253 cmd->cq_handle = vcq->cq_handle; 254 255 ret = pvrdma_cmd_post(dev, &req, NULL, 0); 256 if (ret < 0) 257 dev_warn(&dev->pdev->dev, 258 "could not destroy completion queue, error: %d\n", 259 ret); 260 261 /* free cq's resources */ 262 spin_lock_irqsave(&dev->cq_tbl_lock, flags); 263 dev->cq_tbl[vcq->cq_handle] = NULL; 264 spin_unlock_irqrestore(&dev->cq_tbl_lock, flags); 265 266 pvrdma_free_cq(dev, vcq); 267 atomic_dec(&dev->num_cqs); 268 return 0; 269 } 270 271 static inline struct pvrdma_cqe *get_cqe(struct pvrdma_cq *cq, int i) 272 { 273 return (struct pvrdma_cqe *)pvrdma_page_dir_get_ptr( 274 &cq->pdir, 275 cq->offset + 276 sizeof(struct pvrdma_cqe) * i); 277 } 278 279 void _pvrdma_flush_cqe(struct pvrdma_qp *qp, struct pvrdma_cq *cq) 280 { 281 unsigned int head; 282 int has_data; 283 284 if (!cq->is_kernel) 285 return; 286 287 /* Lock held */ 288 has_data = pvrdma_idx_ring_has_data(&cq->ring_state->rx, 289 cq->ibcq.cqe, &head); 290 if (unlikely(has_data > 0)) { 291 int items; 292 int curr; 293 int tail = pvrdma_idx(&cq->ring_state->rx.prod_tail, 294 cq->ibcq.cqe); 295 struct pvrdma_cqe *cqe; 296 struct pvrdma_cqe *curr_cqe; 297 298 items = (tail > head) ? (tail - head) : 299 (cq->ibcq.cqe - head + tail); 300 curr = --tail; 301 while (items-- > 0) { 302 if (curr < 0) 303 curr = cq->ibcq.cqe - 1; 304 if (tail < 0) 305 tail = cq->ibcq.cqe - 1; 306 curr_cqe = get_cqe(cq, curr); 307 if ((curr_cqe->qp & 0xFFFF) != qp->qp_handle) { 308 if (curr != tail) { 309 cqe = get_cqe(cq, tail); 310 *cqe = *curr_cqe; 311 } 312 tail--; 313 } else { 314 pvrdma_idx_ring_inc( 315 &cq->ring_state->rx.cons_head, 316 cq->ibcq.cqe); 317 } 318 curr--; 319 } 320 } 321 } 322 323 static int pvrdma_poll_one(struct pvrdma_cq *cq, struct pvrdma_qp **cur_qp, 324 struct ib_wc *wc) 325 { 326 struct pvrdma_dev *dev = to_vdev(cq->ibcq.device); 327 int has_data; 328 unsigned int head; 329 bool tried = false; 330 struct pvrdma_cqe *cqe; 331 332 retry: 333 has_data = pvrdma_idx_ring_has_data(&cq->ring_state->rx, 334 cq->ibcq.cqe, &head); 335 if (has_data == 0) { 336 if (tried) 337 return -EAGAIN; 338 339 pvrdma_write_uar_cq(dev, cq->cq_handle | PVRDMA_UAR_CQ_POLL); 340 341 tried = true; 342 goto retry; 343 } else if (has_data == PVRDMA_INVALID_IDX) { 344 dev_err(&dev->pdev->dev, "CQ ring state invalid\n"); 345 return -EAGAIN; 346 } 347 348 cqe = get_cqe(cq, head); 349 350 /* Ensure cqe is valid. */ 351 rmb(); 352 if (dev->qp_tbl[cqe->qp & 0xffff]) 353 *cur_qp = (struct pvrdma_qp *)dev->qp_tbl[cqe->qp & 0xffff]; 354 else 355 return -EAGAIN; 356 357 wc->opcode = pvrdma_wc_opcode_to_ib(cqe->opcode); 358 wc->status = pvrdma_wc_status_to_ib(cqe->status); 359 wc->wr_id = cqe->wr_id; 360 wc->qp = &(*cur_qp)->ibqp; 361 wc->byte_len = cqe->byte_len; 362 wc->ex.imm_data = cqe->imm_data; 363 wc->src_qp = cqe->src_qp; 364 wc->wc_flags = pvrdma_wc_flags_to_ib(cqe->wc_flags); 365 wc->pkey_index = cqe->pkey_index; 366 wc->slid = cqe->slid; 367 wc->sl = cqe->sl; 368 wc->dlid_path_bits = cqe->dlid_path_bits; 369 wc->port_num = cqe->port_num; 370 wc->vendor_err = cqe->vendor_err; 371 wc->network_hdr_type = pvrdma_network_type_to_ib(cqe->network_hdr_type); 372 373 /* Update shared ring state */ 374 pvrdma_idx_ring_inc(&cq->ring_state->rx.cons_head, cq->ibcq.cqe); 375 376 return 0; 377 } 378 379 /** 380 * pvrdma_poll_cq - poll for work completion queue entries 381 * @ibcq: completion queue 382 * @num_entries: the maximum number of entries 383 * @wc: pointer to work completion array 384 * 385 * @return: number of polled completion entries 386 */ 387 int pvrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) 388 { 389 struct pvrdma_cq *cq = to_vcq(ibcq); 390 struct pvrdma_qp *cur_qp = NULL; 391 unsigned long flags; 392 int npolled; 393 394 if (num_entries < 1 || wc == NULL) 395 return 0; 396 397 spin_lock_irqsave(&cq->cq_lock, flags); 398 for (npolled = 0; npolled < num_entries; ++npolled) { 399 if (pvrdma_poll_one(cq, &cur_qp, wc + npolled)) 400 break; 401 } 402 403 spin_unlock_irqrestore(&cq->cq_lock, flags); 404 405 /* Ensure we do not return errors from poll_cq */ 406 return npolled; 407 } 408