1 /* 2 * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. 3 * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. 4 * 5 * This software is available to you under a choice of one of two 6 * licenses. You may choose to be licensed under the terms of the GNU 7 * General Public License (GPL) Version 2, available from the file 8 * COPYING in the main directory of this source tree, or the 9 * OpenIB.org BSD license below: 10 * 11 * Redistribution and use in source and binary forms, with or 12 * without modification, are permitted provided that the following 13 * conditions are met: 14 * 15 * - Redistributions of source code must retain the above 16 * copyright notice, this list of conditions and the following 17 * disclaimer. 18 * 19 * - Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials 22 * provided with the distribution. 23 * 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * SOFTWARE. 32 */ 33 34 #include <linux/mlx4/cq.h> 35 #include <linux/mlx4/qp.h> 36 #include <linux/mlx4/srq.h> 37 #include <linux/slab.h> 38 39 #include "mlx4_ib.h" 40 #include <rdma/mlx4-abi.h> 41 #include <rdma/uverbs_ioctl.h> 42 43 static void mlx4_ib_cq_comp(struct mlx4_cq *cq) 44 { 45 struct ib_cq *ibcq = &to_mibcq(cq)->ibcq; 46 ibcq->comp_handler(ibcq, ibcq->cq_context); 47 } 48 49 static void mlx4_ib_cq_event(struct mlx4_cq *cq, enum mlx4_event type) 50 { 51 struct ib_event event; 52 struct ib_cq *ibcq; 53 54 if (type != MLX4_EVENT_TYPE_CQ_ERROR) { 55 pr_warn("Unexpected event type %d " 56 "on CQ %06x\n", type, cq->cqn); 57 return; 58 } 59 60 ibcq = &to_mibcq(cq)->ibcq; 61 if (ibcq->event_handler) { 62 event.device = ibcq->device; 63 event.event = IB_EVENT_CQ_ERR; 64 event.element.cq = ibcq; 65 ibcq->event_handler(&event, ibcq->cq_context); 66 } 67 } 68 69 static void *get_cqe_from_buf(struct mlx4_ib_cq_buf *buf, int n) 70 { 71 return mlx4_buf_offset(&buf->buf, n * buf->entry_size); 72 } 73 74 static void *get_cqe(struct mlx4_ib_cq *cq, int n) 75 { 76 return get_cqe_from_buf(&cq->buf, n); 77 } 78 79 static void *get_sw_cqe(struct mlx4_ib_cq *cq, int n) 80 { 81 struct mlx4_cqe *cqe = get_cqe(cq, n & cq->ibcq.cqe); 82 struct mlx4_cqe *tcqe = ((cq->buf.entry_size == 64) ? (cqe + 1) : cqe); 83 84 return (!!(tcqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^ 85 !!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe; 86 } 87 88 static struct mlx4_cqe *next_cqe_sw(struct mlx4_ib_cq *cq) 89 { 90 return get_sw_cqe(cq, cq->mcq.cons_index); 91 } 92 93 int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) 94 { 95 struct mlx4_ib_cq *mcq = to_mcq(cq); 96 struct mlx4_ib_dev *dev = to_mdev(cq->device); 97 98 return mlx4_cq_modify(dev->dev, &mcq->mcq, cq_count, cq_period); 99 } 100 101 static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *buf, int nent) 102 { 103 int err; 104 105 err = mlx4_buf_alloc(dev->dev, nent * dev->dev->caps.cqe_size, 106 PAGE_SIZE * 2, &buf->buf); 107 108 if (err) 109 goto out; 110 111 buf->entry_size = dev->dev->caps.cqe_size; 112 err = mlx4_mtt_init(dev->dev, buf->buf.npages, buf->buf.page_shift, 113 &buf->mtt); 114 if (err) 115 goto err_buf; 116 117 err = mlx4_buf_write_mtt(dev->dev, &buf->mtt, &buf->buf); 118 if (err) 119 goto err_mtt; 120 121 return 0; 122 123 err_mtt: 124 mlx4_mtt_cleanup(dev->dev, &buf->mtt); 125 126 err_buf: 127 mlx4_buf_free(dev->dev, nent * buf->entry_size, &buf->buf); 128 129 out: 130 return err; 131 } 132 133 static void mlx4_ib_free_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *buf, int cqe) 134 { 135 mlx4_buf_free(dev->dev, (cqe + 1) * buf->entry_size, &buf->buf); 136 } 137 138 #define CQ_CREATE_FLAGS_SUPPORTED IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION 139 int mlx4_ib_create_user_cq(struct ib_cq *ibcq, 140 const struct ib_cq_init_attr *attr, 141 struct uverbs_attr_bundle *attrs) 142 { 143 struct ib_udata *udata = &attrs->driver_udata; 144 struct ib_device *ibdev = ibcq->device; 145 int entries = attr->cqe; 146 int vector = attr->comp_vector; 147 struct mlx4_ib_dev *dev = to_mdev(ibdev); 148 struct mlx4_ib_cq *cq = to_mcq(ibcq); 149 struct mlx4_ib_create_cq ucmd; 150 int cqe_size = dev->dev->caps.cqe_size; 151 void *buf_addr; 152 int shift; 153 int n; 154 int err; 155 struct mlx4_ib_ucontext *context = rdma_udata_to_drv_context( 156 udata, struct mlx4_ib_ucontext, ibucontext); 157 158 if (attr->cqe > dev->dev->caps.max_cqes) 159 return -EINVAL; 160 161 if (attr->flags & ~CQ_CREATE_FLAGS_SUPPORTED) 162 return -EINVAL; 163 164 entries = roundup_pow_of_two(entries + 1); 165 cq->ibcq.cqe = entries - 1; 166 mutex_init(&cq->resize_mutex); 167 spin_lock_init(&cq->lock); 168 INIT_LIST_HEAD(&cq->send_qp_list); 169 INIT_LIST_HEAD(&cq->recv_qp_list); 170 171 err = ib_copy_validate_udata_in(udata, ucmd, db_addr); 172 if (err) 173 goto err_cq; 174 175 if (ibcq->umem && 176 (dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SW_CQ_INIT)) 177 return -EOPNOTSUPP; 178 179 buf_addr = (void *)(unsigned long)ucmd.buf_addr; 180 181 if (!ibcq->umem) 182 ibcq->umem = ib_umem_get(&dev->ib_dev, ucmd.buf_addr, 183 entries * cqe_size, 184 IB_ACCESS_LOCAL_WRITE); 185 if (IS_ERR(ibcq->umem)) { 186 err = PTR_ERR(ibcq->umem); 187 goto err_cq; 188 } 189 190 shift = mlx4_ib_umem_calc_optimal_mtt_size(cq->ibcq.umem, 0, &n); 191 if (shift < 0) { 192 err = shift; 193 goto err_cq; 194 } 195 196 err = mlx4_mtt_init(dev->dev, n, shift, &cq->buf.mtt); 197 if (err) 198 goto err_cq; 199 200 err = mlx4_ib_umem_write_mtt(dev, &cq->buf.mtt, cq->ibcq.umem); 201 if (err) 202 goto err_mtt; 203 204 err = mlx4_ib_db_map_user(udata, ucmd.db_addr, &cq->db); 205 if (err) 206 goto err_mtt; 207 208 if (dev->eq_table) 209 vector = dev->eq_table[vector % ibdev->num_comp_vectors]; 210 211 err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, &context->uar, 212 cq->db.dma, &cq->mcq, vector, 0, 213 attr->flags & IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION, 214 buf_addr, true); 215 if (err) 216 goto err_dbmap; 217 218 cq->mcq.tasklet_ctx.comp = mlx4_ib_cq_comp; 219 cq->mcq.event = mlx4_ib_cq_event; 220 cq->mcq.usage = MLX4_RES_USAGE_USER_VERBS; 221 222 if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof(__u32))) { 223 err = -EFAULT; 224 goto err_cq_free; 225 } 226 227 return 0; 228 229 err_cq_free: 230 mlx4_cq_free(dev->dev, &cq->mcq); 231 232 err_dbmap: 233 mlx4_ib_db_unmap_user(context, &cq->db); 234 235 err_mtt: 236 mlx4_mtt_cleanup(dev->dev, &cq->buf.mtt); 237 /* UMEM is released by ib_core */ 238 239 err_cq: 240 return err; 241 } 242 243 int mlx4_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, 244 struct uverbs_attr_bundle *attrs) 245 { 246 struct ib_device *ibdev = ibcq->device; 247 int entries = attr->cqe; 248 int vector = attr->comp_vector; 249 struct mlx4_ib_dev *dev = to_mdev(ibdev); 250 struct mlx4_ib_cq *cq = to_mcq(ibcq); 251 void *buf_addr; 252 int err; 253 254 if (attr->cqe > dev->dev->caps.max_cqes) 255 return -EINVAL; 256 257 entries = roundup_pow_of_two(entries + 1); 258 cq->ibcq.cqe = entries - 1; 259 mutex_init(&cq->resize_mutex); 260 spin_lock_init(&cq->lock); 261 INIT_LIST_HEAD(&cq->send_qp_list); 262 INIT_LIST_HEAD(&cq->recv_qp_list); 263 264 err = mlx4_db_alloc(dev->dev, &cq->db, 1); 265 if (err) 266 return err; 267 268 cq->mcq.set_ci_db = cq->db.db; 269 cq->mcq.arm_db = cq->db.db + 1; 270 *cq->mcq.set_ci_db = 0; 271 *cq->mcq.arm_db = 0; 272 273 err = mlx4_ib_alloc_cq_buf(dev, &cq->buf, entries); 274 if (err) 275 goto err_db; 276 277 buf_addr = &cq->buf.buf; 278 279 if (dev->eq_table) 280 vector = dev->eq_table[vector % ibdev->num_comp_vectors]; 281 282 err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, &dev->priv_uar, 283 cq->db.dma, &cq->mcq, vector, 0, 0, 284 buf_addr, false); 285 if (err) 286 goto err_buf; 287 288 cq->mcq.comp = mlx4_ib_cq_comp; 289 cq->mcq.event = mlx4_ib_cq_event; 290 cq->mcq.usage = MLX4_RES_USAGE_DRIVER; 291 292 return 0; 293 294 err_buf: 295 mlx4_ib_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe); 296 297 err_db: 298 mlx4_db_free(dev->dev, &cq->db); 299 return err; 300 } 301 302 static int mlx4_alloc_resize_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq *cq, 303 int entries) 304 { 305 int err; 306 307 if (cq->resize_buf) 308 return -EBUSY; 309 310 cq->resize_buf = kmalloc_obj(*cq->resize_buf); 311 if (!cq->resize_buf) 312 return -ENOMEM; 313 314 err = mlx4_ib_alloc_cq_buf(dev, &cq->resize_buf->buf, entries); 315 if (err) { 316 kfree(cq->resize_buf); 317 cq->resize_buf = NULL; 318 return err; 319 } 320 321 cq->resize_buf->cqe = entries - 1; 322 323 return 0; 324 } 325 326 static int mlx4_alloc_resize_umem(struct mlx4_ib_dev *dev, struct mlx4_ib_cq *cq, 327 int entries, struct ib_udata *udata) 328 { 329 struct mlx4_ib_resize_cq ucmd; 330 int cqe_size = dev->dev->caps.cqe_size; 331 int shift; 332 int n; 333 int err; 334 335 if (cq->resize_umem) 336 return -EBUSY; 337 338 err = ib_copy_validate_udata_in(udata, ucmd, buf_addr); 339 if (err) 340 return err; 341 342 cq->resize_buf = kmalloc_obj(*cq->resize_buf); 343 if (!cq->resize_buf) 344 return -ENOMEM; 345 346 cq->resize_umem = ib_umem_get(&dev->ib_dev, ucmd.buf_addr, 347 entries * cqe_size, 348 IB_ACCESS_LOCAL_WRITE); 349 if (IS_ERR(cq->resize_umem)) { 350 err = PTR_ERR(cq->resize_umem); 351 goto err_buf; 352 } 353 354 shift = mlx4_ib_umem_calc_optimal_mtt_size(cq->resize_umem, 0, &n); 355 if (shift < 0) { 356 err = shift; 357 goto err_umem; 358 } 359 360 err = mlx4_mtt_init(dev->dev, n, shift, &cq->resize_buf->buf.mtt); 361 if (err) 362 goto err_umem; 363 364 err = mlx4_ib_umem_write_mtt(dev, &cq->resize_buf->buf.mtt, 365 cq->resize_umem); 366 if (err) 367 goto err_mtt; 368 369 cq->resize_buf->cqe = entries - 1; 370 371 return 0; 372 373 err_mtt: 374 mlx4_mtt_cleanup(dev->dev, &cq->resize_buf->buf.mtt); 375 376 err_umem: 377 ib_umem_release(cq->resize_umem); 378 379 err_buf: 380 kfree(cq->resize_buf); 381 cq->resize_buf = NULL; 382 return err; 383 } 384 385 static int mlx4_ib_get_outstanding_cqes(struct mlx4_ib_cq *cq) 386 { 387 u32 i; 388 389 i = cq->mcq.cons_index; 390 while (get_sw_cqe(cq, i)) 391 ++i; 392 393 return i - cq->mcq.cons_index; 394 } 395 396 static void mlx4_ib_cq_resize_copy_cqes(struct mlx4_ib_cq *cq) 397 { 398 struct mlx4_cqe *cqe, *new_cqe; 399 int i; 400 int cqe_size = cq->buf.entry_size; 401 int cqe_inc = cqe_size == 64 ? 1 : 0; 402 403 i = cq->mcq.cons_index; 404 cqe = get_cqe(cq, i & cq->ibcq.cqe); 405 cqe += cqe_inc; 406 407 while ((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) != MLX4_CQE_OPCODE_RESIZE) { 408 new_cqe = get_cqe_from_buf(&cq->resize_buf->buf, 409 (i + 1) & cq->resize_buf->cqe); 410 memcpy(new_cqe, get_cqe(cq, i & cq->ibcq.cqe), cqe_size); 411 new_cqe += cqe_inc; 412 413 new_cqe->owner_sr_opcode = (cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK) | 414 (((i + 1) & (cq->resize_buf->cqe + 1)) ? MLX4_CQE_OWNER_MASK : 0); 415 cqe = get_cqe(cq, ++i & cq->ibcq.cqe); 416 cqe += cqe_inc; 417 } 418 ++cq->mcq.cons_index; 419 } 420 421 int mlx4_ib_resize_cq(struct ib_cq *ibcq, unsigned int entries, 422 struct ib_udata *udata) 423 { 424 struct mlx4_ib_dev *dev = to_mdev(ibcq->device); 425 struct mlx4_ib_cq *cq = to_mcq(ibcq); 426 struct mlx4_mtt mtt; 427 int outst_cqe; 428 int err; 429 430 mutex_lock(&cq->resize_mutex); 431 if (entries > dev->dev->caps.max_cqes) { 432 err = -EINVAL; 433 goto out; 434 } 435 436 entries = roundup_pow_of_two(entries + 1); 437 if (entries == ibcq->cqe + 1) { 438 err = 0; 439 goto out; 440 } 441 442 if (entries > dev->dev->caps.max_cqes + 1) { 443 err = -EINVAL; 444 goto out; 445 } 446 447 if (ibcq->uobject) { 448 err = mlx4_alloc_resize_umem(dev, cq, entries, udata); 449 if (err) 450 goto out; 451 } else { 452 /* Can't be smaller than the number of outstanding CQEs */ 453 outst_cqe = mlx4_ib_get_outstanding_cqes(cq); 454 if (entries < outst_cqe + 1) { 455 err = -EINVAL; 456 goto out; 457 } 458 459 err = mlx4_alloc_resize_buf(dev, cq, entries); 460 if (err) 461 goto out; 462 } 463 464 mtt = cq->buf.mtt; 465 466 err = mlx4_cq_resize(dev->dev, &cq->mcq, entries, &cq->resize_buf->buf.mtt); 467 if (err) 468 goto err_buf; 469 470 mlx4_mtt_cleanup(dev->dev, &mtt); 471 if (ibcq->uobject) { 472 cq->buf = cq->resize_buf->buf; 473 cq->ibcq.cqe = cq->resize_buf->cqe; 474 ib_umem_release(cq->ibcq.umem); 475 cq->ibcq.umem = cq->resize_umem; 476 477 kfree(cq->resize_buf); 478 cq->resize_buf = NULL; 479 cq->resize_umem = NULL; 480 } else { 481 struct mlx4_ib_cq_buf tmp_buf; 482 int tmp_cqe = 0; 483 484 spin_lock_irq(&cq->lock); 485 if (cq->resize_buf) { 486 mlx4_ib_cq_resize_copy_cqes(cq); 487 tmp_buf = cq->buf; 488 tmp_cqe = cq->ibcq.cqe; 489 cq->buf = cq->resize_buf->buf; 490 cq->ibcq.cqe = cq->resize_buf->cqe; 491 492 kfree(cq->resize_buf); 493 cq->resize_buf = NULL; 494 } 495 spin_unlock_irq(&cq->lock); 496 497 if (tmp_cqe) 498 mlx4_ib_free_cq_buf(dev, &tmp_buf, tmp_cqe); 499 } 500 501 goto out; 502 503 err_buf: 504 mlx4_mtt_cleanup(dev->dev, &cq->resize_buf->buf.mtt); 505 if (!ibcq->uobject) 506 mlx4_ib_free_cq_buf(dev, &cq->resize_buf->buf, 507 cq->resize_buf->cqe); 508 509 kfree(cq->resize_buf); 510 cq->resize_buf = NULL; 511 512 ib_umem_release(cq->resize_umem); 513 cq->resize_umem = NULL; 514 out: 515 mutex_unlock(&cq->resize_mutex); 516 517 return err; 518 } 519 520 int mlx4_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) 521 { 522 struct mlx4_ib_dev *dev = to_mdev(cq->device); 523 struct mlx4_ib_cq *mcq = to_mcq(cq); 524 525 mlx4_cq_free(dev->dev, &mcq->mcq); 526 mlx4_mtt_cleanup(dev->dev, &mcq->buf.mtt); 527 528 if (udata) { 529 mlx4_ib_db_unmap_user( 530 rdma_udata_to_drv_context( 531 udata, 532 struct mlx4_ib_ucontext, 533 ibucontext), 534 &mcq->db); 535 /* UMEM is released by ib_core */ 536 } else { 537 mlx4_ib_free_cq_buf(dev, &mcq->buf, cq->cqe); 538 mlx4_db_free(dev->dev, &mcq->db); 539 } 540 return 0; 541 } 542 543 static void dump_cqe(void *cqe) 544 { 545 __be32 *buf = cqe; 546 547 pr_debug("CQE contents %08x %08x %08x %08x %08x %08x %08x %08x\n", 548 be32_to_cpu(buf[0]), be32_to_cpu(buf[1]), be32_to_cpu(buf[2]), 549 be32_to_cpu(buf[3]), be32_to_cpu(buf[4]), be32_to_cpu(buf[5]), 550 be32_to_cpu(buf[6]), be32_to_cpu(buf[7])); 551 } 552 553 static void mlx4_ib_handle_error_cqe(struct mlx4_err_cqe *cqe, 554 struct ib_wc *wc) 555 { 556 if (cqe->syndrome == MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR) { 557 pr_debug("local QP operation err " 558 "(QPN %06x, WQE index %x, vendor syndrome %02x, " 559 "opcode = %02x)\n", 560 be32_to_cpu(cqe->my_qpn), be16_to_cpu(cqe->wqe_index), 561 cqe->vendor_err_syndrome, 562 cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK); 563 dump_cqe(cqe); 564 } 565 566 switch (cqe->syndrome) { 567 case MLX4_CQE_SYNDROME_LOCAL_LENGTH_ERR: 568 wc->status = IB_WC_LOC_LEN_ERR; 569 break; 570 case MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR: 571 wc->status = IB_WC_LOC_QP_OP_ERR; 572 break; 573 case MLX4_CQE_SYNDROME_LOCAL_PROT_ERR: 574 wc->status = IB_WC_LOC_PROT_ERR; 575 break; 576 case MLX4_CQE_SYNDROME_WR_FLUSH_ERR: 577 wc->status = IB_WC_WR_FLUSH_ERR; 578 break; 579 case MLX4_CQE_SYNDROME_MW_BIND_ERR: 580 wc->status = IB_WC_MW_BIND_ERR; 581 break; 582 case MLX4_CQE_SYNDROME_BAD_RESP_ERR: 583 wc->status = IB_WC_BAD_RESP_ERR; 584 break; 585 case MLX4_CQE_SYNDROME_LOCAL_ACCESS_ERR: 586 wc->status = IB_WC_LOC_ACCESS_ERR; 587 break; 588 case MLX4_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR: 589 wc->status = IB_WC_REM_INV_REQ_ERR; 590 break; 591 case MLX4_CQE_SYNDROME_REMOTE_ACCESS_ERR: 592 wc->status = IB_WC_REM_ACCESS_ERR; 593 break; 594 case MLX4_CQE_SYNDROME_REMOTE_OP_ERR: 595 wc->status = IB_WC_REM_OP_ERR; 596 break; 597 case MLX4_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR: 598 wc->status = IB_WC_RETRY_EXC_ERR; 599 break; 600 case MLX4_CQE_SYNDROME_RNR_RETRY_EXC_ERR: 601 wc->status = IB_WC_RNR_RETRY_EXC_ERR; 602 break; 603 case MLX4_CQE_SYNDROME_REMOTE_ABORTED_ERR: 604 wc->status = IB_WC_REM_ABORT_ERR; 605 break; 606 default: 607 wc->status = IB_WC_GENERAL_ERR; 608 break; 609 } 610 611 wc->vendor_err = cqe->vendor_err_syndrome; 612 } 613 614 static int mlx4_ib_ipoib_csum_ok(__be16 status, u8 badfcs_enc, __be16 checksum) 615 { 616 return ((badfcs_enc & MLX4_CQE_STATUS_L4_CSUM) || 617 ((status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) && 618 (status & cpu_to_be16(MLX4_CQE_STATUS_TCP | 619 MLX4_CQE_STATUS_UDP)) && 620 (checksum == cpu_to_be16(0xffff)))); 621 } 622 623 static void use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct ib_wc *wc, 624 unsigned tail, struct mlx4_cqe *cqe, int is_eth) 625 { 626 struct mlx4_ib_proxy_sqp_hdr *hdr; 627 628 ib_dma_sync_single_for_cpu(qp->ibqp.device, 629 qp->sqp_proxy_rcv[tail].map, 630 sizeof (struct mlx4_ib_proxy_sqp_hdr), 631 DMA_FROM_DEVICE); 632 hdr = (struct mlx4_ib_proxy_sqp_hdr *) (qp->sqp_proxy_rcv[tail].addr); 633 wc->pkey_index = be16_to_cpu(hdr->tun.pkey_index); 634 wc->src_qp = be32_to_cpu(hdr->tun.flags_src_qp) & 0xFFFFFF; 635 wc->wc_flags |= (hdr->tun.g_ml_path & 0x80) ? (IB_WC_GRH) : 0; 636 wc->dlid_path_bits = 0; 637 638 if (is_eth) { 639 wc->slid = 0; 640 wc->vlan_id = be16_to_cpu(hdr->tun.sl_vid); 641 memcpy(&(wc->smac[0]), (char *)&hdr->tun.mac_31_0, 4); 642 memcpy(&(wc->smac[4]), (char *)&hdr->tun.slid_mac_47_32, 2); 643 wc->wc_flags |= (IB_WC_WITH_VLAN | IB_WC_WITH_SMAC); 644 } else { 645 wc->slid = be16_to_cpu(hdr->tun.slid_mac_47_32); 646 wc->sl = (u8) (be16_to_cpu(hdr->tun.sl_vid) >> 12); 647 } 648 } 649 650 static void mlx4_ib_qp_sw_comp(struct mlx4_ib_qp *qp, int num_entries, 651 struct ib_wc *wc, int *npolled, int is_send) 652 { 653 struct mlx4_ib_wq *wq; 654 unsigned cur; 655 int i; 656 657 wq = is_send ? &qp->sq : &qp->rq; 658 cur = wq->head - wq->tail; 659 660 if (cur == 0) 661 return; 662 663 for (i = 0; i < cur && *npolled < num_entries; i++) { 664 wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; 665 wc->status = IB_WC_WR_FLUSH_ERR; 666 wc->vendor_err = MLX4_CQE_SYNDROME_WR_FLUSH_ERR; 667 wq->tail++; 668 (*npolled)++; 669 wc->qp = &qp->ibqp; 670 wc++; 671 } 672 } 673 674 static void mlx4_ib_poll_sw_comp(struct mlx4_ib_cq *cq, int num_entries, 675 struct ib_wc *wc, int *npolled) 676 { 677 struct mlx4_ib_qp *qp; 678 679 *npolled = 0; 680 /* Find uncompleted WQEs belonging to that cq and return 681 * simulated FLUSH_ERR completions 682 */ 683 list_for_each_entry(qp, &cq->send_qp_list, cq_send_list) { 684 mlx4_ib_qp_sw_comp(qp, num_entries, wc + *npolled, npolled, 1); 685 if (*npolled >= num_entries) 686 goto out; 687 } 688 689 list_for_each_entry(qp, &cq->recv_qp_list, cq_recv_list) { 690 mlx4_ib_qp_sw_comp(qp, num_entries, wc + *npolled, npolled, 0); 691 if (*npolled >= num_entries) 692 goto out; 693 } 694 695 out: 696 return; 697 } 698 699 static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, 700 struct mlx4_ib_qp **cur_qp, 701 struct ib_wc *wc) 702 { 703 struct mlx4_cqe *cqe; 704 struct mlx4_qp *mqp; 705 struct mlx4_ib_wq *wq; 706 struct mlx4_ib_srq *srq; 707 struct mlx4_srq *msrq = NULL; 708 int is_send; 709 int is_error; 710 int is_eth; 711 u32 g_mlpath_rqpn; 712 u16 wqe_ctr; 713 unsigned tail = 0; 714 715 repoll: 716 cqe = next_cqe_sw(cq); 717 if (!cqe) 718 return -EAGAIN; 719 720 if (cq->buf.entry_size == 64) 721 cqe++; 722 723 ++cq->mcq.cons_index; 724 725 /* 726 * Make sure we read CQ entry contents after we've checked the 727 * ownership bit. 728 */ 729 rmb(); 730 731 is_send = cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK; 732 is_error = (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == 733 MLX4_CQE_OPCODE_ERROR; 734 735 /* Resize CQ in progress */ 736 if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_CQE_OPCODE_RESIZE)) { 737 if (cq->resize_buf) { 738 struct mlx4_ib_dev *dev = to_mdev(cq->ibcq.device); 739 740 mlx4_ib_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe); 741 cq->buf = cq->resize_buf->buf; 742 cq->ibcq.cqe = cq->resize_buf->cqe; 743 744 kfree(cq->resize_buf); 745 cq->resize_buf = NULL; 746 } 747 748 goto repoll; 749 } 750 751 if (!*cur_qp || 752 (be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) != (*cur_qp)->mqp.qpn) { 753 /* 754 * We do not have to take the QP table lock here, 755 * because CQs will be locked while QPs are removed 756 * from the table. 757 */ 758 mqp = __mlx4_qp_lookup(to_mdev(cq->ibcq.device)->dev, 759 be32_to_cpu(cqe->vlan_my_qpn)); 760 *cur_qp = to_mibqp(mqp); 761 } 762 763 wc->qp = &(*cur_qp)->ibqp; 764 765 if (wc->qp->qp_type == IB_QPT_XRC_TGT) { 766 u32 srq_num; 767 g_mlpath_rqpn = be32_to_cpu(cqe->g_mlpath_rqpn); 768 srq_num = g_mlpath_rqpn & 0xffffff; 769 /* SRQ is also in the radix tree */ 770 msrq = mlx4_srq_lookup(to_mdev(cq->ibcq.device)->dev, 771 srq_num); 772 } 773 774 if (is_send) { 775 wq = &(*cur_qp)->sq; 776 if (!(*cur_qp)->sq_signal_bits) { 777 wqe_ctr = be16_to_cpu(cqe->wqe_index); 778 wq->tail += (u16) (wqe_ctr - (u16) wq->tail); 779 } 780 wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; 781 ++wq->tail; 782 } else if ((*cur_qp)->ibqp.srq) { 783 srq = to_msrq((*cur_qp)->ibqp.srq); 784 wqe_ctr = be16_to_cpu(cqe->wqe_index); 785 wc->wr_id = srq->wrid[wqe_ctr]; 786 mlx4_ib_free_srq_wqe(srq, wqe_ctr); 787 } else if (msrq) { 788 srq = to_mibsrq(msrq); 789 wqe_ctr = be16_to_cpu(cqe->wqe_index); 790 wc->wr_id = srq->wrid[wqe_ctr]; 791 mlx4_ib_free_srq_wqe(srq, wqe_ctr); 792 } else { 793 wq = &(*cur_qp)->rq; 794 tail = wq->tail & (wq->wqe_cnt - 1); 795 wc->wr_id = wq->wrid[tail]; 796 ++wq->tail; 797 } 798 799 if (unlikely(is_error)) { 800 mlx4_ib_handle_error_cqe((struct mlx4_err_cqe *) cqe, wc); 801 return 0; 802 } 803 804 wc->status = IB_WC_SUCCESS; 805 806 if (is_send) { 807 wc->wc_flags = 0; 808 switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) { 809 case MLX4_OPCODE_RDMA_WRITE_IMM: 810 wc->wc_flags |= IB_WC_WITH_IMM; 811 fallthrough; 812 case MLX4_OPCODE_RDMA_WRITE: 813 wc->opcode = IB_WC_RDMA_WRITE; 814 break; 815 case MLX4_OPCODE_SEND_IMM: 816 wc->wc_flags |= IB_WC_WITH_IMM; 817 fallthrough; 818 case MLX4_OPCODE_SEND: 819 case MLX4_OPCODE_SEND_INVAL: 820 wc->opcode = IB_WC_SEND; 821 break; 822 case MLX4_OPCODE_RDMA_READ: 823 wc->opcode = IB_WC_RDMA_READ; 824 wc->byte_len = be32_to_cpu(cqe->byte_cnt); 825 break; 826 case MLX4_OPCODE_ATOMIC_CS: 827 wc->opcode = IB_WC_COMP_SWAP; 828 wc->byte_len = 8; 829 break; 830 case MLX4_OPCODE_ATOMIC_FA: 831 wc->opcode = IB_WC_FETCH_ADD; 832 wc->byte_len = 8; 833 break; 834 case MLX4_OPCODE_MASKED_ATOMIC_CS: 835 wc->opcode = IB_WC_MASKED_COMP_SWAP; 836 wc->byte_len = 8; 837 break; 838 case MLX4_OPCODE_MASKED_ATOMIC_FA: 839 wc->opcode = IB_WC_MASKED_FETCH_ADD; 840 wc->byte_len = 8; 841 break; 842 case MLX4_OPCODE_LSO: 843 wc->opcode = IB_WC_LSO; 844 break; 845 case MLX4_OPCODE_FMR: 846 wc->opcode = IB_WC_REG_MR; 847 break; 848 case MLX4_OPCODE_LOCAL_INVAL: 849 wc->opcode = IB_WC_LOCAL_INV; 850 break; 851 } 852 } else { 853 wc->byte_len = be32_to_cpu(cqe->byte_cnt); 854 855 switch (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) { 856 case MLX4_RECV_OPCODE_RDMA_WRITE_IMM: 857 wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; 858 wc->wc_flags = IB_WC_WITH_IMM; 859 wc->ex.imm_data = cqe->immed_rss_invalid; 860 break; 861 case MLX4_RECV_OPCODE_SEND_INVAL: 862 wc->opcode = IB_WC_RECV; 863 wc->wc_flags = IB_WC_WITH_INVALIDATE; 864 wc->ex.invalidate_rkey = be32_to_cpu(cqe->immed_rss_invalid); 865 break; 866 case MLX4_RECV_OPCODE_SEND: 867 wc->opcode = IB_WC_RECV; 868 wc->wc_flags = 0; 869 break; 870 case MLX4_RECV_OPCODE_SEND_IMM: 871 wc->opcode = IB_WC_RECV; 872 wc->wc_flags = IB_WC_WITH_IMM; 873 wc->ex.imm_data = cqe->immed_rss_invalid; 874 break; 875 } 876 877 is_eth = (rdma_port_get_link_layer(wc->qp->device, 878 (*cur_qp)->port) == 879 IB_LINK_LAYER_ETHERNET); 880 if (mlx4_is_mfunc(to_mdev(cq->ibcq.device)->dev)) { 881 if ((*cur_qp)->mlx4_ib_qp_type & 882 (MLX4_IB_QPT_PROXY_SMI_OWNER | 883 MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) { 884 use_tunnel_data(*cur_qp, cq, wc, tail, cqe, 885 is_eth); 886 return 0; 887 } 888 } 889 890 g_mlpath_rqpn = be32_to_cpu(cqe->g_mlpath_rqpn); 891 wc->src_qp = g_mlpath_rqpn & 0xffffff; 892 wc->dlid_path_bits = (g_mlpath_rqpn >> 24) & 0x7f; 893 wc->wc_flags |= g_mlpath_rqpn & 0x80000000 ? IB_WC_GRH : 0; 894 wc->pkey_index = be32_to_cpu(cqe->immed_rss_invalid) & 0x7f; 895 wc->wc_flags |= mlx4_ib_ipoib_csum_ok(cqe->status, 896 cqe->badfcs_enc, 897 cqe->checksum) ? IB_WC_IP_CSUM_OK : 0; 898 if (is_eth) { 899 wc->slid = 0; 900 wc->sl = be16_to_cpu(cqe->sl_vid) >> 13; 901 if (be32_to_cpu(cqe->vlan_my_qpn) & 902 MLX4_CQE_CVLAN_PRESENT_MASK) { 903 wc->vlan_id = be16_to_cpu(cqe->sl_vid) & 904 MLX4_CQE_VID_MASK; 905 } else { 906 wc->vlan_id = 0xffff; 907 } 908 memcpy(wc->smac, cqe->smac, ETH_ALEN); 909 wc->wc_flags |= (IB_WC_WITH_VLAN | IB_WC_WITH_SMAC); 910 } else { 911 wc->slid = be16_to_cpu(cqe->rlid); 912 wc->sl = be16_to_cpu(cqe->sl_vid) >> 12; 913 wc->vlan_id = 0xffff; 914 } 915 } 916 917 return 0; 918 } 919 920 int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) 921 { 922 struct mlx4_ib_cq *cq = to_mcq(ibcq); 923 struct mlx4_ib_qp *cur_qp = NULL; 924 unsigned long flags; 925 int npolled; 926 struct mlx4_ib_dev *mdev = to_mdev(cq->ibcq.device); 927 928 spin_lock_irqsave(&cq->lock, flags); 929 if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) { 930 mlx4_ib_poll_sw_comp(cq, num_entries, wc, &npolled); 931 goto out; 932 } 933 934 for (npolled = 0; npolled < num_entries; ++npolled) { 935 if (mlx4_ib_poll_one(cq, &cur_qp, wc + npolled)) 936 break; 937 } 938 939 mlx4_cq_set_ci(&cq->mcq); 940 941 out: 942 spin_unlock_irqrestore(&cq->lock, flags); 943 944 return npolled; 945 } 946 947 int mlx4_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) 948 { 949 mlx4_cq_arm(&to_mcq(ibcq)->mcq, 950 (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ? 951 MLX4_CQ_DB_REQ_NOT_SOL : MLX4_CQ_DB_REQ_NOT, 952 to_mdev(ibcq->device)->uar_map, 953 MLX4_GET_DOORBELL_LOCK(&to_mdev(ibcq->device)->uar_lock)); 954 955 return 0; 956 } 957 958 void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq) 959 { 960 u32 prod_index; 961 int nfreed = 0; 962 struct mlx4_cqe *cqe, *dest; 963 u8 owner_bit; 964 int cqe_inc = cq->buf.entry_size == 64 ? 1 : 0; 965 966 /* 967 * First we need to find the current producer index, so we 968 * know where to start cleaning from. It doesn't matter if HW 969 * adds new entries after this loop -- the QP we're worried 970 * about is already in RESET, so the new entries won't come 971 * from our QP and therefore don't need to be checked. 972 */ 973 for (prod_index = cq->mcq.cons_index; get_sw_cqe(cq, prod_index); ++prod_index) 974 if (prod_index == cq->mcq.cons_index + cq->ibcq.cqe) 975 break; 976 977 /* 978 * Now sweep backwards through the CQ, removing CQ entries 979 * that match our QP by copying older entries on top of them. 980 */ 981 while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) { 982 cqe = get_cqe(cq, prod_index & cq->ibcq.cqe); 983 cqe += cqe_inc; 984 985 if ((be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) == qpn) { 986 if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK)) 987 mlx4_ib_free_srq_wqe(srq, be16_to_cpu(cqe->wqe_index)); 988 ++nfreed; 989 } else if (nfreed) { 990 dest = get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe); 991 dest += cqe_inc; 992 993 owner_bit = dest->owner_sr_opcode & MLX4_CQE_OWNER_MASK; 994 memcpy(dest, cqe, sizeof *cqe); 995 dest->owner_sr_opcode = owner_bit | 996 (dest->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK); 997 } 998 } 999 1000 if (nfreed) { 1001 cq->mcq.cons_index += nfreed; 1002 /* 1003 * Make sure update of buffer contents is done before 1004 * updating consumer index. 1005 */ 1006 wmb(); 1007 mlx4_cq_set_ci(&cq->mcq); 1008 } 1009 } 1010 1011 void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq) 1012 { 1013 spin_lock_irq(&cq->lock); 1014 __mlx4_ib_cq_clean(cq, qpn, srq); 1015 spin_unlock_irq(&cq->lock); 1016 } 1017