1 /*- 2 * Copyright (c) 2013-2020, Mellanox Technologies, Ltd. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 */ 25 26 #include "opt_rss.h" 27 #include "opt_ratelimit.h" 28 29 #include <linux/module.h> 30 #include <dev/mlx5/qp.h> 31 #include <dev/mlx5/srq.h> 32 #include <linux/slab.h> 33 #include <rdma/ib_umem.h> 34 #include <rdma/ib_user_verbs.h> 35 #include <rdma/uverbs_ioctl.h> 36 37 #include <dev/mlx5/mlx5_ib/mlx5_ib.h> 38 39 static void *get_wqe(struct mlx5_ib_srq *srq, int n) 40 { 41 return mlx5_buf_offset(&srq->buf, n << srq->msrq.wqe_shift); 42 } 43 44 static void mlx5_ib_srq_event(struct mlx5_core_srq *srq, int type) 45 { 46 struct ib_event event; 47 struct ib_srq *ibsrq = &to_mibsrq(srq)->ibsrq; 48 49 if (ibsrq->event_handler) { 50 event.device = ibsrq->device; 51 event.element.srq = ibsrq; 52 switch (type) { 53 case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT: 54 event.event = IB_EVENT_SRQ_LIMIT_REACHED; 55 break; 56 case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR: 57 event.event = IB_EVENT_SRQ_ERR; 58 break; 59 default: 60 pr_warn("mlx5_ib: Unexpected event type %d on SRQ %06x\n", 61 type, srq->srqn); 62 return; 63 } 64 65 ibsrq->event_handler(&event, ibsrq->srq_context); 66 } 67 } 68 69 static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, 70 struct mlx5_srq_attr *in, 71 struct ib_udata *udata, int buf_size) 72 { 73 struct mlx5_ib_dev *dev = to_mdev(pd->device); 74 struct mlx5_ib_create_srq ucmd = {}; 75 struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context( 76 udata, struct mlx5_ib_ucontext, ibucontext); 77 size_t ucmdlen; 78 int err; 79 int npages; 80 int page_shift; 81 int ncont; 82 u32 offset; 83 u32 uidx = MLX5_IB_DEFAULT_UIDX; 84 85 ucmdlen = min(udata->inlen, sizeof(ucmd)); 86 87 if (ib_copy_from_udata(&ucmd, udata, ucmdlen)) { 88 mlx5_ib_dbg(dev, "failed copy udata\n"); 89 return -EFAULT; 90 } 91 92 if (ucmd.reserved0 || ucmd.reserved1) 93 return -EINVAL; 94 95 if (udata->inlen > sizeof(ucmd) && 96 !ib_is_udata_cleared(udata, sizeof(ucmd), 97 udata->inlen - sizeof(ucmd))) 98 return -EINVAL; 99 100 if (in->type != IB_SRQT_BASIC) { 101 err = get_srq_user_index(ucontext, &ucmd, udata->inlen, &uidx); 102 if (err) 103 return err; 104 } 105 106 srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE); 107 108 srq->umem = ib_umem_get(&ucontext->ibucontext, ucmd.buf_addr, buf_size, 0, 0); 109 if (IS_ERR(srq->umem)) { 110 mlx5_ib_dbg(dev, "failed umem get, size %d\n", buf_size); 111 err = PTR_ERR(srq->umem); 112 return err; 113 } 114 115 mlx5_ib_cont_pages(srq->umem, ucmd.buf_addr, 0, &npages, 116 &page_shift, &ncont, NULL); 117 err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, 118 &offset); 119 if (err) { 120 mlx5_ib_warn(dev, "bad offset\n"); 121 goto err_umem; 122 } 123 124 in->pas = mlx5_vzalloc(sizeof(*in->pas) * ncont); 125 if (!in->pas) { 126 err = -ENOMEM; 127 goto err_umem; 128 } 129 130 mlx5_ib_populate_pas(dev, srq->umem, page_shift, in->pas, 0); 131 132 err = mlx5_ib_db_map_user(ucontext, ucmd.db_addr, &srq->db); 133 if (err) { 134 mlx5_ib_dbg(dev, "map doorbell failed\n"); 135 goto err_in; 136 } 137 138 in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT; 139 in->page_offset = offset; 140 in->uid = (in->type != IB_SRQT_XRC) ? to_mpd(pd)->uid : 0; 141 if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 && 142 in->type != IB_SRQT_BASIC) 143 in->user_index = uidx; 144 145 return 0; 146 147 err_in: 148 kvfree(in->pas); 149 150 err_umem: 151 ib_umem_release(srq->umem); 152 153 return err; 154 } 155 156 static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq, 157 struct mlx5_srq_attr *in, int buf_size) 158 { 159 int err; 160 int i; 161 struct mlx5_wqe_srq_next_seg *next; 162 163 err = mlx5_db_alloc(dev->mdev, &srq->db); 164 if (err) { 165 mlx5_ib_warn(dev, "alloc dbell rec failed\n"); 166 return err; 167 } 168 169 if (mlx5_buf_alloc(dev->mdev, buf_size, 2 * PAGE_SIZE, &srq->buf)) { 170 mlx5_ib_dbg(dev, "buf alloc failed\n"); 171 err = -ENOMEM; 172 goto err_db; 173 } 174 175 srq->head = 0; 176 srq->tail = srq->msrq.max - 1; 177 srq->wqe_ctr = 0; 178 179 for (i = 0; i < srq->msrq.max; i++) { 180 next = get_wqe(srq, i); 181 next->next_wqe_index = 182 cpu_to_be16((i + 1) & (srq->msrq.max - 1)); 183 } 184 185 mlx5_ib_dbg(dev, "srq->buf.page_shift = %d\n", srq->buf.page_shift); 186 in->pas = mlx5_vzalloc(sizeof(*in->pas) * srq->buf.npages); 187 if (!in->pas) { 188 err = -ENOMEM; 189 goto err_buf; 190 } 191 mlx5_fill_page_array(&srq->buf, in->pas); 192 193 srq->wrid = kmalloc(srq->msrq.max * sizeof(u64), GFP_KERNEL); 194 if (!srq->wrid) { 195 err = -ENOMEM; 196 goto err_in; 197 } 198 srq->wq_sig = 0; 199 200 in->log_page_size = srq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT; 201 if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 && 202 in->type != IB_SRQT_BASIC) 203 in->user_index = MLX5_IB_DEFAULT_UIDX; 204 205 return 0; 206 207 err_in: 208 kvfree(in->pas); 209 210 err_buf: 211 mlx5_buf_free(dev->mdev, &srq->buf); 212 213 err_db: 214 mlx5_db_free(dev->mdev, &srq->db); 215 return err; 216 } 217 218 static void destroy_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, 219 struct ib_udata *udata) 220 { 221 mlx5_ib_db_unmap_user( 222 rdma_udata_to_drv_context( 223 udata, 224 struct mlx5_ib_ucontext, 225 ibucontext), 226 &srq->db); 227 ib_umem_release(srq->umem); 228 } 229 230 231 static void destroy_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq) 232 { 233 kfree(srq->wrid); 234 mlx5_buf_free(dev->mdev, &srq->buf); 235 mlx5_db_free(dev->mdev, &srq->db); 236 } 237 238 int mlx5_ib_create_srq(struct ib_srq *ib_srq, 239 struct ib_srq_init_attr *init_attr, 240 struct ib_udata *udata) 241 { 242 struct mlx5_ib_dev *dev = to_mdev(ib_srq->device); 243 struct mlx5_ib_srq *srq = to_msrq(ib_srq); 244 size_t desc_size; 245 size_t buf_size; 246 int err; 247 struct mlx5_srq_attr in = {}; 248 __u32 max_srq_wqes = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz); 249 250 /* Sanity check SRQ size before proceeding */ 251 if (init_attr->attr.max_wr >= max_srq_wqes) { 252 mlx5_ib_dbg(dev, "max_wr %d, cap %d\n", 253 init_attr->attr.max_wr, 254 max_srq_wqes); 255 return -EINVAL; 256 } 257 258 mutex_init(&srq->mutex); 259 spin_lock_init(&srq->lock); 260 srq->msrq.max = roundup_pow_of_two(init_attr->attr.max_wr + 1); 261 srq->msrq.max_gs = init_attr->attr.max_sge; 262 263 desc_size = sizeof(struct mlx5_wqe_srq_next_seg) + 264 srq->msrq.max_gs * sizeof(struct mlx5_wqe_data_seg); 265 if (desc_size == 0 || srq->msrq.max_gs > desc_size) 266 return -EINVAL; 267 268 desc_size = roundup_pow_of_two(desc_size); 269 desc_size = max_t(size_t, 32, desc_size); 270 if (desc_size < sizeof(struct mlx5_wqe_srq_next_seg)) 271 return -EINVAL; 272 273 srq->msrq.max_avail_gather = (desc_size - sizeof(struct mlx5_wqe_srq_next_seg)) / 274 sizeof(struct mlx5_wqe_data_seg); 275 srq->msrq.wqe_shift = ilog2(desc_size); 276 buf_size = srq->msrq.max * desc_size; 277 if (buf_size < desc_size) 278 return -EINVAL; 279 280 in.type = init_attr->srq_type; 281 282 if (udata) 283 err = create_srq_user(ib_srq->pd, srq, &in, udata, buf_size); 284 else 285 err = create_srq_kernel(dev, srq, &in, buf_size); 286 287 if (err) { 288 mlx5_ib_warn(dev, "create srq %s failed, err %d\n", 289 udata ? "user" : "kernel", err); 290 return err; 291 } 292 293 in.log_size = ilog2(srq->msrq.max); 294 in.wqe_shift = srq->msrq.wqe_shift - 4; 295 if (srq->wq_sig) 296 in.flags |= MLX5_SRQ_FLAG_WQ_SIG; 297 298 if (init_attr->srq_type == IB_SRQT_XRC) 299 in.xrcd = to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn; 300 else 301 in.xrcd = to_mxrcd(dev->devr.x0)->xrcdn; 302 303 if (ib_srq_has_cq(init_attr->srq_type)) 304 in.cqn = to_mcq(init_attr->ext.cq)->mcq.cqn; 305 else 306 in.cqn = to_mcq(dev->devr.c0)->mcq.cqn; 307 308 in.pd = to_mpd(ib_srq->pd)->pdn; 309 in.db_record = srq->db.dma; 310 err = mlx5_core_create_srq(dev->mdev, &srq->msrq, &in); 311 kvfree(in.pas); 312 if (err) { 313 mlx5_ib_dbg(dev, "create SRQ failed, err %d\n", err); 314 goto err_usr_kern_srq; 315 } 316 317 mlx5_ib_dbg(dev, "create SRQ with srqn 0x%x\n", srq->msrq.srqn); 318 319 srq->msrq.event = mlx5_ib_srq_event; 320 srq->ibsrq.ext.xrc.srq_num = srq->msrq.srqn; 321 322 if (udata) 323 if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof(__u32))) { 324 mlx5_ib_dbg(dev, "copy to user failed\n"); 325 err = -EFAULT; 326 goto err_core; 327 } 328 329 init_attr->attr.max_wr = srq->msrq.max - 1; 330 331 return 0; 332 333 err_core: 334 mlx5_core_destroy_srq(dev->mdev, &srq->msrq); 335 336 err_usr_kern_srq: 337 if (udata) 338 destroy_srq_user(ib_srq->pd, srq, udata); 339 else 340 destroy_srq_kernel(dev, srq); 341 342 return err; 343 } 344 345 int mlx5_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, 346 enum ib_srq_attr_mask attr_mask, struct ib_udata *udata) 347 { 348 struct mlx5_ib_dev *dev = to_mdev(ibsrq->device); 349 struct mlx5_ib_srq *srq = to_msrq(ibsrq); 350 int ret; 351 352 /* We don't support resizing SRQs yet */ 353 if (attr_mask & IB_SRQ_MAX_WR) 354 return -EINVAL; 355 356 if (attr_mask & IB_SRQ_LIMIT) { 357 if (attr->srq_limit >= srq->msrq.max) 358 return -EINVAL; 359 360 mutex_lock(&srq->mutex); 361 ret = mlx5_core_arm_srq(dev->mdev, &srq->msrq, attr->srq_limit, 1); 362 mutex_unlock(&srq->mutex); 363 364 if (ret) 365 return ret; 366 } 367 368 return 0; 369 } 370 371 int mlx5_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr) 372 { 373 struct mlx5_ib_dev *dev = to_mdev(ibsrq->device); 374 struct mlx5_ib_srq *srq = to_msrq(ibsrq); 375 int ret; 376 struct mlx5_srq_attr *out; 377 378 out = kzalloc(sizeof(*out), GFP_KERNEL); 379 if (!out) 380 return -ENOMEM; 381 382 ret = mlx5_core_query_srq(dev->mdev, &srq->msrq, out); 383 if (ret) 384 goto out_box; 385 386 srq_attr->srq_limit = out->lwm; 387 srq_attr->max_wr = srq->msrq.max - 1; 388 srq_attr->max_sge = srq->msrq.max_gs; 389 390 out_box: 391 kfree(out); 392 return ret; 393 } 394 395 void mlx5_ib_destroy_srq(struct ib_srq *srq, struct ib_udata *udata) 396 { 397 struct mlx5_ib_dev *dev = to_mdev(srq->device); 398 struct mlx5_ib_srq *msrq = to_msrq(srq); 399 400 mlx5_core_destroy_srq(dev->mdev, &msrq->msrq); 401 402 if (srq->uobject) { 403 mlx5_ib_db_unmap_user( 404 rdma_udata_to_drv_context( 405 udata, 406 struct mlx5_ib_ucontext, 407 ibucontext), 408 &msrq->db); 409 ib_umem_release(msrq->umem); 410 } else { 411 destroy_srq_kernel(dev, msrq); 412 } 413 } 414 415 void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index) 416 { 417 struct mlx5_wqe_srq_next_seg *next; 418 419 /* always called with interrupts disabled. */ 420 spin_lock(&srq->lock); 421 422 next = get_wqe(srq, srq->tail); 423 next->next_wqe_index = cpu_to_be16(wqe_index); 424 srq->tail = wqe_index; 425 426 spin_unlock(&srq->lock); 427 } 428 429 int mlx5_ib_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, 430 const struct ib_recv_wr **bad_wr) 431 { 432 struct mlx5_ib_srq *srq = to_msrq(ibsrq); 433 struct mlx5_wqe_srq_next_seg *next; 434 struct mlx5_wqe_data_seg *scat; 435 struct mlx5_ib_dev *dev = to_mdev(ibsrq->device); 436 struct mlx5_core_dev *mdev = dev->mdev; 437 unsigned long flags; 438 int err = 0; 439 int nreq; 440 int i; 441 442 spin_lock_irqsave(&srq->lock, flags); 443 444 if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { 445 err = -EIO; 446 *bad_wr = wr; 447 goto out; 448 } 449 450 for (nreq = 0; wr; nreq++, wr = wr->next) { 451 if (unlikely(wr->num_sge > srq->msrq.max_gs)) { 452 err = -EINVAL; 453 *bad_wr = wr; 454 break; 455 } 456 457 if (unlikely(srq->head == srq->tail)) { 458 err = -ENOMEM; 459 *bad_wr = wr; 460 break; 461 } 462 463 srq->wrid[srq->head] = wr->wr_id; 464 465 next = get_wqe(srq, srq->head); 466 srq->head = be16_to_cpu(next->next_wqe_index); 467 scat = (struct mlx5_wqe_data_seg *)(next + 1); 468 469 for (i = 0; i < wr->num_sge; i++) { 470 scat[i].byte_count = cpu_to_be32(wr->sg_list[i].length); 471 scat[i].lkey = cpu_to_be32(wr->sg_list[i].lkey); 472 scat[i].addr = cpu_to_be64(wr->sg_list[i].addr); 473 } 474 475 if (i < srq->msrq.max_avail_gather) { 476 scat[i].byte_count = 0; 477 scat[i].lkey = cpu_to_be32(MLX5_INVALID_LKEY); 478 scat[i].addr = 0; 479 } 480 } 481 482 if (likely(nreq)) { 483 srq->wqe_ctr += nreq; 484 485 /* Make sure that descriptors are written before 486 * doorbell record. 487 */ 488 wmb(); 489 490 *srq->db.db = cpu_to_be32(srq->wqe_ctr); 491 } 492 out: 493 spin_unlock_irqrestore(&srq->lock, flags); 494 495 return err; 496 } 497