1 /*- 2 * Copyright (c) 2013-2020, Mellanox Technologies. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 */ 25 26 #include "opt_rss.h" 27 #include "opt_ratelimit.h" 28 29 #include <linux/kref.h> 30 #include <rdma/ib_umem.h> 31 #include <rdma/ib_user_verbs.h> 32 #include <rdma/ib_cache.h> 33 #include <rdma/uverbs_ioctl.h> 34 #include <dev/mlx5/mlx5_ib/mlx5_ib.h> 35 36 static void mlx5_ib_cq_comp(struct mlx5_core_cq *cq, struct mlx5_eqe *eqe __unused) 37 { 38 struct ib_cq *ibcq = &to_mibcq(cq)->ibcq; 39 40 ibcq->comp_handler(ibcq, ibcq->cq_context); 41 } 42 43 static void mlx5_ib_cq_event(struct mlx5_core_cq *mcq, int type) 44 { 45 struct mlx5_ib_cq *cq = container_of(mcq, struct mlx5_ib_cq, mcq); 46 struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device); 47 struct ib_cq *ibcq = &cq->ibcq; 48 struct ib_event event; 49 50 if (type != MLX5_EVENT_TYPE_CQ_ERROR) { 51 mlx5_ib_warn(dev, "Unexpected event type %d on CQ %06x\n", 52 type, mcq->cqn); 53 return; 54 } 55 56 if (ibcq->event_handler) { 57 event.device = &dev->ib_dev; 58 event.event = IB_EVENT_CQ_ERR; 59 event.element.cq = ibcq; 60 ibcq->event_handler(&event, ibcq->cq_context); 61 } 62 } 63 64 static void *get_cqe_from_buf(struct mlx5_ib_cq_buf *buf, int n, int size) 65 { 66 return mlx5_buf_offset(&buf->buf, n * size); 67 } 68 69 static void *get_cqe(struct mlx5_ib_cq *cq, int n) 70 { 71 return get_cqe_from_buf(&cq->buf, n, cq->mcq.cqe_sz); 72 } 73 74 static u8 sw_ownership_bit(int n, int nent) 75 { 76 return (n & nent) ? 1 : 0; 77 } 78 79 static void *get_sw_cqe(struct mlx5_ib_cq *cq, int n) 80 { 81 void *cqe = get_cqe(cq, n & cq->ibcq.cqe); 82 struct mlx5_cqe64 *cqe64; 83 84 cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64; 85 86 if (likely((cqe64->op_own) >> 4 != MLX5_CQE_INVALID) && 87 !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & (cq->ibcq.cqe + 1)))) { 88 return cqe; 89 } else { 90 return NULL; 91 } 92 } 93 94 static void *next_cqe_sw(struct mlx5_ib_cq *cq) 95 { 96 return get_sw_cqe(cq, cq->mcq.cons_index); 97 } 98 99 static enum ib_wc_opcode get_umr_comp(struct mlx5_ib_wq *wq, int idx) 100 { 101 switch (wq->wr_data[idx]) { 102 case MLX5_IB_WR_UMR: 103 return 0; 104 105 case IB_WR_LOCAL_INV: 106 return IB_WC_LOCAL_INV; 107 108 case IB_WR_REG_MR: 109 return IB_WC_REG_MR; 110 111 default: 112 pr_warn("unknown completion status\n"); 113 return 0; 114 } 115 } 116 117 static void handle_good_req(struct ib_wc *wc, struct mlx5_cqe64 *cqe, 118 struct mlx5_ib_wq *wq, int idx) 119 { 120 wc->wc_flags = 0; 121 switch (be32_to_cpu(cqe->sop_drop_qpn) >> 24) { 122 case MLX5_OPCODE_RDMA_WRITE_IMM: 123 wc->wc_flags |= IB_WC_WITH_IMM; 124 case MLX5_OPCODE_RDMA_WRITE: 125 wc->opcode = IB_WC_RDMA_WRITE; 126 break; 127 case MLX5_OPCODE_SEND_IMM: 128 wc->wc_flags |= IB_WC_WITH_IMM; 129 case MLX5_OPCODE_SEND: 130 case MLX5_OPCODE_SEND_INVAL: 131 wc->opcode = IB_WC_SEND; 132 break; 133 case MLX5_OPCODE_RDMA_READ: 134 wc->opcode = IB_WC_RDMA_READ; 135 wc->byte_len = be32_to_cpu(cqe->byte_cnt); 136 break; 137 case MLX5_OPCODE_ATOMIC_CS: 138 wc->opcode = IB_WC_COMP_SWAP; 139 wc->byte_len = 8; 140 break; 141 case MLX5_OPCODE_ATOMIC_FA: 142 wc->opcode = IB_WC_FETCH_ADD; 143 wc->byte_len = 8; 144 break; 145 case MLX5_OPCODE_ATOMIC_MASKED_CS: 146 wc->opcode = IB_WC_MASKED_COMP_SWAP; 147 wc->byte_len = 8; 148 break; 149 case MLX5_OPCODE_ATOMIC_MASKED_FA: 150 wc->opcode = IB_WC_MASKED_FETCH_ADD; 151 wc->byte_len = 8; 152 break; 153 case MLX5_OPCODE_UMR: 154 wc->opcode = get_umr_comp(wq, idx); 155 break; 156 } 157 } 158 159 enum { 160 MLX5_GRH_IN_BUFFER = 1, 161 MLX5_GRH_IN_CQE = 2, 162 }; 163 164 static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe, 165 struct mlx5_ib_qp *qp) 166 { 167 enum rdma_link_layer ll = rdma_port_get_link_layer(qp->ibqp.device, 1); 168 struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.device); 169 struct mlx5_ib_srq *srq; 170 struct mlx5_ib_wq *wq; 171 u16 wqe_ctr; 172 u8 roce_packet_type; 173 bool vlan_present; 174 u8 g; 175 176 if (qp->ibqp.srq || qp->ibqp.xrcd) { 177 struct mlx5_core_srq *msrq = NULL; 178 179 if (qp->ibqp.xrcd) { 180 msrq = mlx5_core_get_srq(dev->mdev, 181 be32_to_cpu(cqe->srqn)); 182 srq = to_mibsrq(msrq); 183 } else { 184 srq = to_msrq(qp->ibqp.srq); 185 } 186 if (srq) { 187 wqe_ctr = be16_to_cpu(cqe->wqe_counter); 188 wc->wr_id = srq->wrid[wqe_ctr]; 189 mlx5_ib_free_srq_wqe(srq, wqe_ctr); 190 if (msrq && atomic_dec_and_test(&msrq->refcount)) 191 complete(&msrq->free); 192 } 193 } else { 194 wq = &qp->rq; 195 wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; 196 ++wq->tail; 197 } 198 wc->byte_len = be32_to_cpu(cqe->byte_cnt); 199 200 switch (cqe->op_own >> 4) { 201 case MLX5_CQE_RESP_WR_IMM: 202 wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; 203 wc->wc_flags = IB_WC_WITH_IMM; 204 wc->ex.imm_data = cqe->imm_inval_pkey; 205 break; 206 case MLX5_CQE_RESP_SEND: 207 wc->opcode = IB_WC_RECV; 208 wc->wc_flags = IB_WC_IP_CSUM_OK; 209 if (unlikely(!((cqe->hds_ip_ext & CQE_L3_OK) && 210 (cqe->hds_ip_ext & CQE_L4_OK)))) 211 wc->wc_flags = 0; 212 break; 213 case MLX5_CQE_RESP_SEND_IMM: 214 wc->opcode = IB_WC_RECV; 215 wc->wc_flags = IB_WC_WITH_IMM; 216 wc->ex.imm_data = cqe->imm_inval_pkey; 217 break; 218 case MLX5_CQE_RESP_SEND_INV: 219 wc->opcode = IB_WC_RECV; 220 wc->wc_flags = IB_WC_WITH_INVALIDATE; 221 wc->ex.invalidate_rkey = be32_to_cpu(cqe->imm_inval_pkey); 222 break; 223 } 224 wc->src_qp = be32_to_cpu(cqe->flags_rqpn) & 0xffffff; 225 wc->dlid_path_bits = cqe->ml_path; 226 g = (be32_to_cpu(cqe->flags_rqpn) >> 28) & 3; 227 wc->wc_flags |= g ? IB_WC_GRH : 0; 228 if (unlikely(is_qp1(qp->ibqp.qp_type))) { 229 u16 pkey = be32_to_cpu(cqe->imm_inval_pkey) & 0xffff; 230 231 ib_find_cached_pkey(&dev->ib_dev, qp->port, pkey, 232 &wc->pkey_index); 233 } else { 234 wc->pkey_index = 0; 235 } 236 237 if (ll != IB_LINK_LAYER_ETHERNET) { 238 wc->slid = be16_to_cpu(cqe->slid); 239 wc->sl = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0xf; 240 return; 241 } 242 243 wc->slid = 0; 244 vlan_present = cqe_has_vlan(cqe); 245 roce_packet_type = (be32_to_cpu(cqe->flags_rqpn) >> 24) & 0x3; 246 if (vlan_present) { 247 wc->vlan_id = (be16_to_cpu(cqe->vlan_info)) & 0xfff; 248 wc->sl = (be16_to_cpu(cqe->vlan_info) >> 13) & 0x7; 249 wc->wc_flags |= IB_WC_WITH_VLAN; 250 } else { 251 wc->sl = 0; 252 } 253 254 switch (roce_packet_type) { 255 case MLX5_CQE_ROCE_L3_HEADER_TYPE_GRH: 256 wc->network_hdr_type = RDMA_NETWORK_IB; 257 break; 258 case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV6: 259 wc->network_hdr_type = RDMA_NETWORK_IPV6; 260 break; 261 case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV4: 262 wc->network_hdr_type = RDMA_NETWORK_IPV4; 263 break; 264 } 265 wc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE; 266 } 267 268 static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe) 269 { 270 __be32 *p = (__be32 *)cqe; 271 int i; 272 273 mlx5_ib_warn(dev, "dump error cqe\n"); 274 for (i = 0; i < sizeof(*cqe) / 16; i++, p += 4) 275 pr_info("%08x %08x %08x %08x\n", be32_to_cpu(p[0]), 276 be32_to_cpu(p[1]), be32_to_cpu(p[2]), 277 be32_to_cpu(p[3])); 278 } 279 280 static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev, 281 struct mlx5_err_cqe *cqe, 282 struct ib_wc *wc) 283 { 284 int dump = 1; 285 286 switch (cqe->syndrome) { 287 case MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR: 288 wc->status = IB_WC_LOC_LEN_ERR; 289 break; 290 case MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR: 291 wc->status = IB_WC_LOC_QP_OP_ERR; 292 break; 293 case MLX5_CQE_SYNDROME_LOCAL_PROT_ERR: 294 wc->status = IB_WC_LOC_PROT_ERR; 295 break; 296 case MLX5_CQE_SYNDROME_WR_FLUSH_ERR: 297 dump = 0; 298 wc->status = IB_WC_WR_FLUSH_ERR; 299 break; 300 case MLX5_CQE_SYNDROME_MW_BIND_ERR: 301 wc->status = IB_WC_MW_BIND_ERR; 302 break; 303 case MLX5_CQE_SYNDROME_BAD_RESP_ERR: 304 wc->status = IB_WC_BAD_RESP_ERR; 305 break; 306 case MLX5_CQE_SYNDROME_LOCAL_ACCESS_ERR: 307 wc->status = IB_WC_LOC_ACCESS_ERR; 308 break; 309 case MLX5_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR: 310 wc->status = IB_WC_REM_INV_REQ_ERR; 311 break; 312 case MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR: 313 wc->status = IB_WC_REM_ACCESS_ERR; 314 break; 315 case MLX5_CQE_SYNDROME_REMOTE_OP_ERR: 316 wc->status = IB_WC_REM_OP_ERR; 317 break; 318 case MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR: 319 wc->status = IB_WC_RETRY_EXC_ERR; 320 dump = 0; 321 break; 322 case MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR: 323 wc->status = IB_WC_RNR_RETRY_EXC_ERR; 324 dump = 0; 325 break; 326 case MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR: 327 wc->status = IB_WC_REM_ABORT_ERR; 328 break; 329 default: 330 wc->status = IB_WC_GENERAL_ERR; 331 break; 332 } 333 334 wc->vendor_err = cqe->vendor_err_synd; 335 if (dump) 336 dump_cqe(dev, cqe); 337 } 338 339 static int is_atomic_response(struct mlx5_ib_qp *qp, uint16_t idx) 340 { 341 /* TBD: waiting decision 342 */ 343 return 0; 344 } 345 346 static void *mlx5_get_atomic_laddr(struct mlx5_ib_qp *qp, uint16_t idx) 347 { 348 struct mlx5_wqe_data_seg *dpseg; 349 void *addr; 350 351 dpseg = mlx5_get_send_wqe(qp, idx) + sizeof(struct mlx5_wqe_ctrl_seg) + 352 sizeof(struct mlx5_wqe_raddr_seg) + 353 sizeof(struct mlx5_wqe_atomic_seg); 354 addr = (void *)(unsigned long)be64_to_cpu(dpseg->addr); 355 return addr; 356 } 357 358 static void handle_atomic(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64, 359 uint16_t idx) 360 { 361 void *addr; 362 int byte_count; 363 int i; 364 365 if (!is_atomic_response(qp, idx)) 366 return; 367 368 byte_count = be32_to_cpu(cqe64->byte_cnt); 369 addr = mlx5_get_atomic_laddr(qp, idx); 370 371 if (byte_count == 4) { 372 *(uint32_t *)addr = be32_to_cpu(*((__be32 *)addr)); 373 } else { 374 for (i = 0; i < byte_count; i += 8) { 375 *(uint64_t *)addr = be64_to_cpu(*((__be64 *)addr)); 376 addr += 8; 377 } 378 } 379 380 return; 381 } 382 383 static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64, 384 u16 tail, u16 head) 385 { 386 u16 idx; 387 388 do { 389 idx = tail & (qp->sq.wqe_cnt - 1); 390 handle_atomic(qp, cqe64, idx); 391 if (idx == head) 392 break; 393 394 tail = qp->sq.w_list[idx].next; 395 } while (1); 396 tail = qp->sq.w_list[idx].next; 397 qp->sq.last_poll = tail; 398 } 399 400 static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf) 401 { 402 mlx5_buf_free(dev->mdev, &buf->buf); 403 } 404 405 static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe, 406 struct ib_sig_err *item) 407 { 408 u16 syndrome = be16_to_cpu(cqe->syndrome); 409 410 #define GUARD_ERR (1 << 13) 411 #define APPTAG_ERR (1 << 12) 412 #define REFTAG_ERR (1 << 11) 413 414 if (syndrome & GUARD_ERR) { 415 item->err_type = IB_SIG_BAD_GUARD; 416 item->expected = be32_to_cpu(cqe->expected_trans_sig) >> 16; 417 item->actual = be32_to_cpu(cqe->actual_trans_sig) >> 16; 418 } else 419 if (syndrome & REFTAG_ERR) { 420 item->err_type = IB_SIG_BAD_REFTAG; 421 item->expected = be32_to_cpu(cqe->expected_reftag); 422 item->actual = be32_to_cpu(cqe->actual_reftag); 423 } else 424 if (syndrome & APPTAG_ERR) { 425 item->err_type = IB_SIG_BAD_APPTAG; 426 item->expected = be32_to_cpu(cqe->expected_trans_sig) & 0xffff; 427 item->actual = be32_to_cpu(cqe->actual_trans_sig) & 0xffff; 428 } else { 429 pr_err("Got signature completion error with bad syndrome %04x\n", 430 syndrome); 431 } 432 433 item->sig_err_offset = be64_to_cpu(cqe->err_offset); 434 item->key = be32_to_cpu(cqe->mkey); 435 } 436 437 static void sw_send_comp(struct mlx5_ib_qp *qp, int num_entries, 438 struct ib_wc *wc, int *npolled) 439 { 440 struct mlx5_ib_wq *wq; 441 unsigned int cur; 442 unsigned int idx; 443 int np; 444 int i; 445 446 wq = &qp->sq; 447 cur = wq->head - wq->tail; 448 np = *npolled; 449 450 if (cur == 0) 451 return; 452 453 for (i = 0; i < cur && np < num_entries; i++) { 454 idx = wq->last_poll & (wq->wqe_cnt - 1); 455 wc->wr_id = wq->wrid[idx]; 456 wc->status = IB_WC_WR_FLUSH_ERR; 457 wc->vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR; 458 wq->tail++; 459 np++; 460 wc->qp = &qp->ibqp; 461 wc++; 462 wq->last_poll = wq->w_list[idx].next; 463 } 464 *npolled = np; 465 } 466 467 static void sw_recv_comp(struct mlx5_ib_qp *qp, int num_entries, 468 struct ib_wc *wc, int *npolled) 469 { 470 struct mlx5_ib_wq *wq; 471 unsigned int cur; 472 int np; 473 int i; 474 475 wq = &qp->rq; 476 cur = wq->head - wq->tail; 477 np = *npolled; 478 479 if (cur == 0) 480 return; 481 482 for (i = 0; i < cur && np < num_entries; i++) { 483 wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; 484 wc->status = IB_WC_WR_FLUSH_ERR; 485 wc->vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR; 486 wq->tail++; 487 np++; 488 wc->qp = &qp->ibqp; 489 wc++; 490 } 491 *npolled = np; 492 } 493 494 static void mlx5_ib_poll_sw_comp(struct mlx5_ib_cq *cq, int num_entries, 495 struct ib_wc *wc, int *npolled) 496 { 497 struct mlx5_ib_qp *qp; 498 499 *npolled = 0; 500 /* Find uncompleted WQEs belonging to that cq and retrun mmics ones */ 501 list_for_each_entry(qp, &cq->list_send_qp, cq_send_list) { 502 sw_send_comp(qp, num_entries, wc + *npolled, npolled); 503 if (*npolled >= num_entries) 504 return; 505 } 506 507 list_for_each_entry(qp, &cq->list_recv_qp, cq_recv_list) { 508 sw_recv_comp(qp, num_entries, wc + *npolled, npolled); 509 if (*npolled >= num_entries) 510 return; 511 } 512 } 513 514 static int mlx5_poll_one(struct mlx5_ib_cq *cq, 515 struct mlx5_ib_qp **cur_qp, 516 struct ib_wc *wc) 517 { 518 struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device); 519 struct mlx5_err_cqe *err_cqe; 520 struct mlx5_cqe64 *cqe64; 521 struct mlx5_core_qp *mqp; 522 struct mlx5_ib_wq *wq; 523 struct mlx5_sig_err_cqe *sig_err_cqe; 524 struct mlx5_core_mkey *mmkey; 525 struct mlx5_ib_mr *mr; 526 unsigned long flags; 527 uint8_t opcode; 528 uint32_t qpn; 529 u16 wqe_ctr; 530 void *cqe; 531 int idx; 532 533 repoll: 534 cqe = next_cqe_sw(cq); 535 if (!cqe) 536 return -EAGAIN; 537 538 cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64; 539 540 ++cq->mcq.cons_index; 541 542 /* Make sure we read CQ entry contents after we've checked the 543 * ownership bit. 544 */ 545 rmb(); 546 547 opcode = cqe64->op_own >> 4; 548 if (unlikely(opcode == MLX5_CQE_RESIZE_CQ)) { 549 if (likely(cq->resize_buf)) { 550 free_cq_buf(dev, &cq->buf); 551 cq->buf = *cq->resize_buf; 552 kfree(cq->resize_buf); 553 cq->resize_buf = NULL; 554 goto repoll; 555 } else { 556 mlx5_ib_warn(dev, "unexpected resize cqe\n"); 557 } 558 } 559 560 qpn = ntohl(cqe64->sop_drop_qpn) & 0xffffff; 561 if (!*cur_qp || (qpn != (*cur_qp)->ibqp.qp_num)) { 562 /* We do not have to take the QP table lock here, 563 * because CQs will be locked while QPs are removed 564 * from the table. 565 */ 566 mqp = __mlx5_qp_lookup(dev->mdev, qpn); 567 *cur_qp = to_mibqp(mqp); 568 } 569 570 wc->qp = &(*cur_qp)->ibqp; 571 switch (opcode) { 572 case MLX5_CQE_REQ: 573 wq = &(*cur_qp)->sq; 574 wqe_ctr = be16_to_cpu(cqe64->wqe_counter); 575 idx = wqe_ctr & (wq->wqe_cnt - 1); 576 handle_good_req(wc, cqe64, wq, idx); 577 handle_atomics(*cur_qp, cqe64, wq->last_poll, idx); 578 wc->wr_id = wq->wrid[idx]; 579 wq->tail = wq->wqe_head[idx] + 1; 580 wc->status = IB_WC_SUCCESS; 581 break; 582 case MLX5_CQE_RESP_WR_IMM: 583 case MLX5_CQE_RESP_SEND: 584 case MLX5_CQE_RESP_SEND_IMM: 585 case MLX5_CQE_RESP_SEND_INV: 586 handle_responder(wc, cqe64, *cur_qp); 587 wc->status = IB_WC_SUCCESS; 588 break; 589 case MLX5_CQE_RESIZE_CQ: 590 break; 591 case MLX5_CQE_REQ_ERR: 592 case MLX5_CQE_RESP_ERR: 593 err_cqe = (struct mlx5_err_cqe *)cqe64; 594 mlx5_handle_error_cqe(dev, err_cqe, wc); 595 mlx5_ib_dbg(dev, "%s error cqe on cqn 0x%x:\n", 596 opcode == MLX5_CQE_REQ_ERR ? 597 "Requestor" : "Responder", cq->mcq.cqn); 598 mlx5_ib_dbg(dev, "syndrome 0x%x, vendor syndrome 0x%x\n", 599 err_cqe->syndrome, err_cqe->vendor_err_synd); 600 if (opcode == MLX5_CQE_REQ_ERR) { 601 wq = &(*cur_qp)->sq; 602 wqe_ctr = be16_to_cpu(cqe64->wqe_counter); 603 idx = wqe_ctr & (wq->wqe_cnt - 1); 604 wc->wr_id = wq->wrid[idx]; 605 wq->tail = wq->wqe_head[idx] + 1; 606 } else { 607 struct mlx5_ib_srq *srq; 608 609 if ((*cur_qp)->ibqp.srq) { 610 srq = to_msrq((*cur_qp)->ibqp.srq); 611 wqe_ctr = be16_to_cpu(cqe64->wqe_counter); 612 wc->wr_id = srq->wrid[wqe_ctr]; 613 mlx5_ib_free_srq_wqe(srq, wqe_ctr); 614 } else { 615 wq = &(*cur_qp)->rq; 616 wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; 617 ++wq->tail; 618 } 619 } 620 break; 621 case MLX5_CQE_SIG_ERR: 622 sig_err_cqe = (struct mlx5_sig_err_cqe *)cqe64; 623 624 spin_lock_irqsave(&dev->mdev->priv.mr_table.lock, flags); 625 mmkey = __mlx5_mr_lookup(dev->mdev, 626 mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey))); 627 mr = to_mibmr(mmkey); 628 get_sig_err_item(sig_err_cqe, &mr->sig->err_item); 629 mr->sig->sig_err_exists = true; 630 mr->sig->sigerr_count++; 631 632 mlx5_ib_warn(dev, "CQN: 0x%x Got SIGERR on key: 0x%x err_type %x err_offset %llx expected %x actual %x\n", 633 cq->mcq.cqn, mr->sig->err_item.key, 634 mr->sig->err_item.err_type, 635 (long long)mr->sig->err_item.sig_err_offset, 636 mr->sig->err_item.expected, 637 mr->sig->err_item.actual); 638 639 spin_unlock_irqrestore(&dev->mdev->priv.mr_table.lock, flags); 640 goto repoll; 641 } 642 643 return 0; 644 } 645 646 static int poll_soft_wc(struct mlx5_ib_cq *cq, int num_entries, 647 struct ib_wc *wc) 648 { 649 struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device); 650 struct mlx5_ib_wc *soft_wc, *next; 651 int npolled = 0; 652 653 list_for_each_entry_safe(soft_wc, next, &cq->wc_list, list) { 654 if (npolled >= num_entries) 655 break; 656 657 mlx5_ib_dbg(dev, "polled software generated completion on CQ 0x%x\n", 658 cq->mcq.cqn); 659 660 wc[npolled++] = soft_wc->wc; 661 list_del(&soft_wc->list); 662 kfree(soft_wc); 663 } 664 665 return npolled; 666 } 667 668 int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) 669 { 670 struct mlx5_ib_cq *cq = to_mcq(ibcq); 671 struct mlx5_ib_qp *cur_qp = NULL; 672 struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device); 673 struct mlx5_core_dev *mdev = dev->mdev; 674 unsigned long flags; 675 int soft_polled = 0; 676 int npolled; 677 678 spin_lock_irqsave(&cq->lock, flags); 679 if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)) { 680 mlx5_ib_poll_sw_comp(cq, num_entries, wc, &npolled); 681 goto out; 682 } 683 684 if (unlikely(!list_empty(&cq->wc_list))) 685 soft_polled = poll_soft_wc(cq, num_entries, wc); 686 687 for (npolled = 0; npolled < num_entries - soft_polled; npolled++) { 688 if (mlx5_poll_one(cq, &cur_qp, wc + soft_polled + npolled)) 689 break; 690 } 691 692 if (npolled) 693 mlx5_cq_set_ci(&cq->mcq); 694 out: 695 spin_unlock_irqrestore(&cq->lock, flags); 696 697 return soft_polled + npolled; 698 } 699 700 int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) 701 { 702 struct mlx5_core_dev *mdev = to_mdev(ibcq->device)->mdev; 703 struct mlx5_ib_cq *cq = to_mcq(ibcq); 704 void __iomem *uar_page = mdev->priv.uar->map; 705 unsigned long irq_flags; 706 int ret = 0; 707 708 if (unlikely(mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)) 709 return -1; 710 711 spin_lock_irqsave(&cq->lock, irq_flags); 712 if (cq->notify_flags != IB_CQ_NEXT_COMP) 713 cq->notify_flags = flags & IB_CQ_SOLICITED_MASK; 714 715 if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !list_empty(&cq->wc_list)) 716 ret = 1; 717 spin_unlock_irqrestore(&cq->lock, irq_flags); 718 719 mlx5_cq_arm(&cq->mcq, 720 (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ? 721 MLX5_CQ_DB_REQ_NOT_SOL : MLX5_CQ_DB_REQ_NOT, 722 uar_page, 723 MLX5_GET_DOORBELL_LOCK(&mdev->priv.cq_uar_lock), 724 cq->mcq.cons_index); 725 726 return ret; 727 } 728 729 static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf, 730 int nent, int cqe_size) 731 { 732 int err; 733 734 err = mlx5_buf_alloc(dev->mdev, nent * cqe_size, 735 2 * PAGE_SIZE, &buf->buf); 736 if (err) 737 return err; 738 739 buf->cqe_size = cqe_size; 740 buf->nent = nent; 741 742 return 0; 743 } 744 745 static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, 746 struct mlx5_ib_cq *cq, int entries, u32 **cqb, 747 int *cqe_size, int *index, int *inlen) 748 { 749 struct mlx5_ib_create_cq ucmd = {}; 750 size_t ucmdlen; 751 int page_shift; 752 __be64 *pas; 753 int npages; 754 int ncont; 755 void *cqc; 756 int err; 757 struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context( 758 udata, struct mlx5_ib_ucontext, ibucontext); 759 760 ucmdlen = min(udata->inlen, sizeof(ucmd)); 761 if (ucmdlen < offsetof(struct mlx5_ib_create_cq, flags)) 762 return -EINVAL; 763 764 if (ib_copy_from_udata(&ucmd, udata, ucmdlen)) 765 return -EFAULT; 766 767 if ((ucmd.flags & ~(MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX))) 768 return -EINVAL; 769 770 if (ucmd.cqe_size != 64 && ucmd.cqe_size != 128) 771 return -EINVAL; 772 773 *cqe_size = ucmd.cqe_size; 774 775 cq->buf.umem = ib_umem_get(&context->ibucontext, ucmd.buf_addr, 776 entries * ucmd.cqe_size, 777 IB_ACCESS_LOCAL_WRITE, 1); 778 if (IS_ERR(cq->buf.umem)) { 779 err = PTR_ERR(cq->buf.umem); 780 return err; 781 } 782 783 err = mlx5_ib_db_map_user(context, ucmd.db_addr, 784 &cq->db); 785 if (err) 786 goto err_umem; 787 788 mlx5_ib_cont_pages(cq->buf.umem, ucmd.buf_addr, 0, &npages, &page_shift, 789 &ncont, NULL); 790 mlx5_ib_dbg(dev, "addr 0x%llx, size %u, npages %d, page_shift %d, ncont %d\n", 791 (long long)ucmd.buf_addr, entries * ucmd.cqe_size, npages, page_shift, ncont); 792 793 *inlen = MLX5_ST_SZ_BYTES(create_cq_in) + 794 MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * ncont; 795 *cqb = mlx5_vzalloc(*inlen); 796 if (!*cqb) { 797 err = -ENOMEM; 798 goto err_db; 799 } 800 801 pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas); 802 mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, pas, 0); 803 804 cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context); 805 MLX5_SET(cqc, cqc, log_page_size, 806 page_shift - MLX5_ADAPTER_PAGE_SHIFT); 807 808 if (ucmd.flags & MLX5_IB_CREATE_CQ_FLAGS_UAR_PAGE_INDEX) { 809 *index = ucmd.uar_page_index; 810 } else if (context->bfregi.lib_uar_dyn) { 811 err = -EINVAL; 812 goto err_cqb; 813 } else { 814 *index = context->bfregi.sys_pages[0]; 815 } 816 817 MLX5_SET(create_cq_in, *cqb, uid, context->devx_uid); 818 return 0; 819 820 err_cqb: 821 kvfree(*cqb); 822 823 err_db: 824 mlx5_ib_db_unmap_user(context, &cq->db); 825 826 err_umem: 827 ib_umem_release(cq->buf.umem); 828 return err; 829 } 830 831 static void destroy_cq_user(struct mlx5_ib_cq *cq, struct ib_udata *udata) 832 { 833 struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context( 834 udata, struct mlx5_ib_ucontext, ibucontext); 835 836 mlx5_ib_db_unmap_user(context, &cq->db); 837 ib_umem_release(cq->buf.umem); 838 } 839 840 static void init_cq_buf(struct mlx5_ib_cq *cq, struct mlx5_ib_cq_buf *buf) 841 { 842 int i; 843 void *cqe; 844 struct mlx5_cqe64 *cqe64; 845 846 for (i = 0; i < buf->nent; i++) { 847 cqe = get_cqe_from_buf(buf, i, buf->cqe_size); 848 cqe64 = buf->cqe_size == 64 ? cqe : cqe + 64; 849 cqe64->op_own = MLX5_CQE_INVALID << 4; 850 } 851 } 852 853 static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, 854 int entries, int cqe_size, 855 u32 **cqb, int *index, int *inlen) 856 { 857 __be64 *pas; 858 void *cqc; 859 int err; 860 861 err = mlx5_db_alloc(dev->mdev, &cq->db); 862 if (err) 863 return err; 864 865 cq->mcq.set_ci_db = cq->db.db; 866 cq->mcq.arm_db = cq->db.db + 1; 867 cq->mcq.cqe_sz = cqe_size; 868 869 err = alloc_cq_buf(dev, &cq->buf, entries, cqe_size); 870 if (err) 871 goto err_db; 872 873 init_cq_buf(cq, &cq->buf); 874 875 *inlen = MLX5_ST_SZ_BYTES(create_cq_in) + 876 MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * cq->buf.buf.npages; 877 *cqb = mlx5_vzalloc(*inlen); 878 if (!*cqb) { 879 err = -ENOMEM; 880 goto err_buf; 881 } 882 883 pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas); 884 mlx5_fill_page_array(&cq->buf.buf, pas); 885 886 cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context); 887 MLX5_SET(cqc, cqc, log_page_size, 888 cq->buf.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); 889 890 *index = dev->mdev->priv.uar->index; 891 892 return 0; 893 894 err_buf: 895 free_cq_buf(dev, &cq->buf); 896 897 err_db: 898 mlx5_db_free(dev->mdev, &cq->db); 899 return err; 900 } 901 902 static void destroy_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq) 903 { 904 free_cq_buf(dev, &cq->buf); 905 mlx5_db_free(dev->mdev, &cq->db); 906 } 907 908 static void notify_soft_wc_handler(struct work_struct *work) 909 { 910 struct mlx5_ib_cq *cq = container_of(work, struct mlx5_ib_cq, 911 notify_work); 912 913 cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context); 914 } 915 916 int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, 917 struct ib_udata *udata) 918 { 919 struct ib_device *ibdev = ibcq->device; 920 int entries = attr->cqe; 921 int vector = attr->comp_vector; 922 struct mlx5_ib_dev *dev = to_mdev(ibdev); 923 u32 out[MLX5_ST_SZ_DW(create_cq_out)]; 924 struct mlx5_ib_cq *cq = to_mcq(ibcq); 925 int uninitialized_var(index); 926 int uninitialized_var(inlen); 927 u32 *cqb = NULL; 928 void *cqc; 929 int cqe_size; 930 unsigned int irqn; 931 int eqn; 932 int err; 933 934 if (entries < 0 || 935 (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)))) 936 return -EINVAL; 937 938 if (check_cq_create_flags(attr->flags)) 939 return -EOPNOTSUPP; 940 941 entries = roundup_pow_of_two(entries + 1); 942 if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz))) 943 return -EINVAL; 944 945 cq->ibcq.cqe = entries - 1; 946 mutex_init(&cq->resize_mutex); 947 spin_lock_init(&cq->lock); 948 cq->resize_buf = NULL; 949 cq->resize_umem = NULL; 950 cq->create_flags = attr->flags; 951 INIT_LIST_HEAD(&cq->list_send_qp); 952 INIT_LIST_HEAD(&cq->list_recv_qp); 953 954 if (udata) { 955 err = create_cq_user(dev, udata, cq, entries, &cqb, &cqe_size, 956 &index, &inlen); 957 if (err) 958 return err; 959 } else { 960 cqe_size = cache_line_size() == 128 ? 128 : 64; 961 err = create_cq_kernel(dev, cq, entries, cqe_size, &cqb, 962 &index, &inlen); 963 if (err) 964 return err; 965 966 INIT_WORK(&cq->notify_work, notify_soft_wc_handler); 967 } 968 969 err = mlx5_vector2eqn(dev->mdev, vector, &eqn, &irqn); 970 if (err) 971 goto err_cqb; 972 973 cq->cqe_size = cqe_size; 974 975 cqc = MLX5_ADDR_OF(create_cq_in, cqb, cq_context); 976 MLX5_SET(cqc, cqc, cqe_sz, cqe_sz_to_mlx_sz(cqe_size)); 977 MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries)); 978 MLX5_SET(cqc, cqc, uar_page, index); 979 MLX5_SET(cqc, cqc, c_eqn, eqn); 980 MLX5_SET64(cqc, cqc, dbr_addr, cq->db.dma); 981 if (cq->create_flags & IB_CQ_FLAGS_IGNORE_OVERRUN) 982 MLX5_SET(cqc, cqc, oi, 1); 983 984 err = mlx5_core_create_cq(dev->mdev, &cq->mcq, cqb, inlen, out, sizeof(out)); 985 if (err) 986 goto err_cqb; 987 988 mlx5_ib_dbg(dev, "cqn 0x%x\n", cq->mcq.cqn); 989 cq->mcq.irqn = irqn; 990 cq->mcq.comp = mlx5_ib_cq_comp; 991 cq->mcq.event = mlx5_ib_cq_event; 992 993 INIT_LIST_HEAD(&cq->wc_list); 994 995 if (udata) 996 if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof(__u32))) { 997 err = -EFAULT; 998 goto err_cmd; 999 } 1000 1001 1002 kvfree(cqb); 1003 return 0; 1004 1005 err_cmd: 1006 mlx5_core_destroy_cq(dev->mdev, &cq->mcq); 1007 1008 err_cqb: 1009 kvfree(cqb); 1010 if (udata) 1011 destroy_cq_user(cq, udata); 1012 else 1013 destroy_cq_kernel(dev, cq); 1014 return err; 1015 } 1016 1017 void mlx5_ib_destroy_cq(struct ib_cq *cq, struct ib_udata *udata) 1018 { 1019 struct mlx5_ib_dev *dev = to_mdev(cq->device); 1020 struct mlx5_ib_cq *mcq = to_mcq(cq); 1021 1022 mlx5_core_destroy_cq(dev->mdev, &mcq->mcq); 1023 if (udata) 1024 destroy_cq_user(mcq, udata); 1025 else 1026 destroy_cq_kernel(dev, mcq); 1027 } 1028 1029 static int is_equal_rsn(struct mlx5_cqe64 *cqe64, u32 rsn) 1030 { 1031 return rsn == (ntohl(cqe64->sop_drop_qpn) & 0xffffff); 1032 } 1033 1034 void __mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 rsn, struct mlx5_ib_srq *srq) 1035 { 1036 struct mlx5_cqe64 *cqe64, *dest64; 1037 void *cqe, *dest; 1038 u32 prod_index; 1039 int nfreed = 0; 1040 u8 owner_bit; 1041 1042 if (!cq) 1043 return; 1044 1045 /* First we need to find the current producer index, so we 1046 * know where to start cleaning from. It doesn't matter if HW 1047 * adds new entries after this loop -- the QP we're worried 1048 * about is already in RESET, so the new entries won't come 1049 * from our QP and therefore don't need to be checked. 1050 */ 1051 for (prod_index = cq->mcq.cons_index; get_sw_cqe(cq, prod_index); prod_index++) 1052 if (prod_index == cq->mcq.cons_index + cq->ibcq.cqe) 1053 break; 1054 1055 /* Now sweep backwards through the CQ, removing CQ entries 1056 * that match our QP by copying older entries on top of them. 1057 */ 1058 while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) { 1059 cqe = get_cqe(cq, prod_index & cq->ibcq.cqe); 1060 cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64; 1061 if (is_equal_rsn(cqe64, rsn)) { 1062 if (srq && (ntohl(cqe64->srqn) & 0xffffff)) 1063 mlx5_ib_free_srq_wqe(srq, be16_to_cpu(cqe64->wqe_counter)); 1064 ++nfreed; 1065 } else if (nfreed) { 1066 dest = get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe); 1067 dest64 = (cq->mcq.cqe_sz == 64) ? dest : dest + 64; 1068 owner_bit = dest64->op_own & MLX5_CQE_OWNER_MASK; 1069 memcpy(dest, cqe, cq->mcq.cqe_sz); 1070 dest64->op_own = owner_bit | 1071 (dest64->op_own & ~MLX5_CQE_OWNER_MASK); 1072 } 1073 } 1074 1075 if (nfreed) { 1076 cq->mcq.cons_index += nfreed; 1077 /* Make sure update of buffer contents is done before 1078 * updating consumer index. 1079 */ 1080 wmb(); 1081 mlx5_cq_set_ci(&cq->mcq); 1082 } 1083 } 1084 1085 void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq) 1086 { 1087 if (!cq) 1088 return; 1089 1090 spin_lock_irq(&cq->lock); 1091 __mlx5_ib_cq_clean(cq, qpn, srq); 1092 spin_unlock_irq(&cq->lock); 1093 } 1094 1095 int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) 1096 { 1097 struct mlx5_ib_dev *dev = to_mdev(cq->device); 1098 struct mlx5_ib_cq *mcq = to_mcq(cq); 1099 int err; 1100 1101 if (!MLX5_CAP_GEN(dev->mdev, cq_moderation)) 1102 return -ENOSYS; 1103 1104 err = mlx5_core_modify_cq_moderation(dev->mdev, &mcq->mcq, 1105 cq_period, cq_count); 1106 if (err) 1107 mlx5_ib_warn(dev, "modify cq 0x%x failed\n", mcq->mcq.cqn); 1108 1109 return err; 1110 } 1111 1112 static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, 1113 int entries, struct ib_udata *udata, int *npas, 1114 int *page_shift, int *cqe_size) 1115 { 1116 struct mlx5_ib_resize_cq ucmd; 1117 struct ib_umem *umem; 1118 int err; 1119 int npages; 1120 struct ib_ucontext *context = cq->buf.umem->context; 1121 1122 err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); 1123 if (err) 1124 return err; 1125 1126 if (ucmd.reserved0 || ucmd.reserved1) 1127 return -EINVAL; 1128 1129 /* check multiplication overflow */ 1130 if (ucmd.cqe_size && SIZE_MAX / ucmd.cqe_size <= entries - 1) 1131 return -EINVAL; 1132 1133 umem = ib_umem_get(context, ucmd.buf_addr, 1134 (size_t)ucmd.cqe_size * entries, 1135 IB_ACCESS_LOCAL_WRITE, 1); 1136 if (IS_ERR(umem)) { 1137 err = PTR_ERR(umem); 1138 return err; 1139 } 1140 1141 mlx5_ib_cont_pages(umem, ucmd.buf_addr, 0, &npages, page_shift, 1142 npas, NULL); 1143 1144 cq->resize_umem = umem; 1145 *cqe_size = ucmd.cqe_size; 1146 1147 return 0; 1148 } 1149 1150 static void un_resize_user(struct mlx5_ib_cq *cq) 1151 { 1152 ib_umem_release(cq->resize_umem); 1153 } 1154 1155 static int resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, 1156 int entries, int cqe_size) 1157 { 1158 int err; 1159 1160 cq->resize_buf = kzalloc(sizeof(*cq->resize_buf), GFP_KERNEL); 1161 if (!cq->resize_buf) 1162 return -ENOMEM; 1163 1164 err = alloc_cq_buf(dev, cq->resize_buf, entries, cqe_size); 1165 if (err) 1166 goto ex; 1167 1168 init_cq_buf(cq, cq->resize_buf); 1169 1170 return 0; 1171 1172 ex: 1173 kfree(cq->resize_buf); 1174 return err; 1175 } 1176 1177 static void un_resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq) 1178 { 1179 free_cq_buf(dev, cq->resize_buf); 1180 cq->resize_buf = NULL; 1181 } 1182 1183 static int copy_resize_cqes(struct mlx5_ib_cq *cq) 1184 { 1185 struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device); 1186 struct mlx5_cqe64 *scqe64; 1187 struct mlx5_cqe64 *dcqe64; 1188 void *start_cqe; 1189 void *scqe; 1190 void *dcqe; 1191 int ssize; 1192 int dsize; 1193 int i; 1194 u8 sw_own; 1195 1196 ssize = cq->buf.cqe_size; 1197 dsize = cq->resize_buf->cqe_size; 1198 if (ssize != dsize) { 1199 mlx5_ib_warn(dev, "resize from different cqe size is not supported\n"); 1200 return -EINVAL; 1201 } 1202 1203 i = cq->mcq.cons_index; 1204 scqe = get_sw_cqe(cq, i); 1205 scqe64 = ssize == 64 ? scqe : scqe + 64; 1206 start_cqe = scqe; 1207 if (!scqe) { 1208 mlx5_ib_warn(dev, "expected cqe in sw ownership\n"); 1209 return -EINVAL; 1210 } 1211 1212 while ((scqe64->op_own >> 4) != MLX5_CQE_RESIZE_CQ) { 1213 dcqe = get_cqe_from_buf(cq->resize_buf, 1214 (i + 1) & (cq->resize_buf->nent), 1215 dsize); 1216 dcqe64 = dsize == 64 ? dcqe : dcqe + 64; 1217 sw_own = sw_ownership_bit(i + 1, cq->resize_buf->nent); 1218 memcpy(dcqe, scqe, dsize); 1219 dcqe64->op_own = (dcqe64->op_own & ~MLX5_CQE_OWNER_MASK) | sw_own; 1220 1221 ++i; 1222 scqe = get_sw_cqe(cq, i); 1223 scqe64 = ssize == 64 ? scqe : scqe + 64; 1224 if (!scqe) { 1225 mlx5_ib_warn(dev, "expected cqe in sw ownership\n"); 1226 return -EINVAL; 1227 } 1228 1229 if (scqe == start_cqe) { 1230 pr_warn("resize CQ failed to get resize CQE, CQN 0x%x\n", 1231 cq->mcq.cqn); 1232 return -ENOMEM; 1233 } 1234 } 1235 ++cq->mcq.cons_index; 1236 return 0; 1237 } 1238 1239 int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) 1240 { 1241 struct mlx5_ib_dev *dev = to_mdev(ibcq->device); 1242 struct mlx5_ib_cq *cq = to_mcq(ibcq); 1243 void *cqc; 1244 u32 *in; 1245 int err; 1246 int npas; 1247 __be64 *pas; 1248 int page_shift; 1249 int inlen; 1250 int uninitialized_var(cqe_size); 1251 unsigned long flags; 1252 1253 if (!MLX5_CAP_GEN(dev->mdev, cq_resize)) { 1254 pr_info("Firmware does not support resize CQ\n"); 1255 return -ENOSYS; 1256 } 1257 1258 if (entries < 1 || 1259 entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz))) { 1260 mlx5_ib_warn(dev, "wrong entries number %d, max %d\n", 1261 entries, 1262 1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)); 1263 return -EINVAL; 1264 } 1265 1266 entries = roundup_pow_of_two(entries + 1); 1267 if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)) + 1) 1268 return -EINVAL; 1269 1270 if (entries == ibcq->cqe + 1) 1271 return 0; 1272 1273 mutex_lock(&cq->resize_mutex); 1274 if (udata) { 1275 err = resize_user(dev, cq, entries, udata, &npas, &page_shift, 1276 &cqe_size); 1277 } else { 1278 cqe_size = 64; 1279 err = resize_kernel(dev, cq, entries, cqe_size); 1280 if (!err) { 1281 npas = cq->resize_buf->buf.npages; 1282 page_shift = cq->resize_buf->buf.page_shift; 1283 } 1284 } 1285 1286 if (err) 1287 goto ex; 1288 1289 inlen = MLX5_ST_SZ_BYTES(modify_cq_in) + 1290 MLX5_FLD_SZ_BYTES(modify_cq_in, pas[0]) * npas; 1291 1292 in = mlx5_vzalloc(inlen); 1293 if (!in) { 1294 err = -ENOMEM; 1295 goto ex_resize; 1296 } 1297 1298 pas = (__be64 *)MLX5_ADDR_OF(modify_cq_in, in, pas); 1299 if (udata) 1300 mlx5_ib_populate_pas(dev, cq->resize_umem, page_shift, 1301 pas, 0); 1302 else 1303 mlx5_fill_page_array(&cq->resize_buf->buf, pas); 1304 1305 MLX5_SET(modify_cq_in, in, 1306 modify_field_select_resize_field_select.resize_field_select.resize_field_select, 1307 MLX5_MODIFY_CQ_MASK_LOG_SIZE | 1308 MLX5_MODIFY_CQ_MASK_PG_OFFSET | 1309 MLX5_MODIFY_CQ_MASK_PG_SIZE); 1310 1311 cqc = MLX5_ADDR_OF(modify_cq_in, in, cq_context); 1312 1313 MLX5_SET(cqc, cqc, log_page_size, 1314 page_shift - MLX5_ADAPTER_PAGE_SHIFT); 1315 MLX5_SET(cqc, cqc, cqe_sz, cqe_sz_to_mlx_sz(cqe_size)); 1316 MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries)); 1317 1318 MLX5_SET(modify_cq_in, in, op_mod, MLX5_CQ_OPMOD_RESIZE); 1319 MLX5_SET(modify_cq_in, in, cqn, cq->mcq.cqn); 1320 1321 err = mlx5_core_modify_cq(dev->mdev, &cq->mcq, in, inlen); 1322 if (err) 1323 goto ex_alloc; 1324 1325 if (udata) { 1326 cq->ibcq.cqe = entries - 1; 1327 ib_umem_release(cq->buf.umem); 1328 cq->buf.umem = cq->resize_umem; 1329 cq->resize_umem = NULL; 1330 } else { 1331 struct mlx5_ib_cq_buf tbuf; 1332 int resized = 0; 1333 1334 spin_lock_irqsave(&cq->lock, flags); 1335 if (cq->resize_buf) { 1336 err = copy_resize_cqes(cq); 1337 if (!err) { 1338 tbuf = cq->buf; 1339 cq->buf = *cq->resize_buf; 1340 kfree(cq->resize_buf); 1341 cq->resize_buf = NULL; 1342 resized = 1; 1343 } 1344 } 1345 cq->ibcq.cqe = entries - 1; 1346 spin_unlock_irqrestore(&cq->lock, flags); 1347 if (resized) 1348 free_cq_buf(dev, &tbuf); 1349 } 1350 mutex_unlock(&cq->resize_mutex); 1351 1352 kvfree(in); 1353 return 0; 1354 1355 ex_alloc: 1356 kvfree(in); 1357 1358 ex_resize: 1359 if (udata) 1360 un_resize_user(cq); 1361 else 1362 un_resize_kernel(dev, cq); 1363 ex: 1364 mutex_unlock(&cq->resize_mutex); 1365 return err; 1366 } 1367 1368 int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq) 1369 { 1370 struct mlx5_ib_cq *cq; 1371 1372 if (!ibcq) 1373 return 128; 1374 1375 cq = to_mcq(ibcq); 1376 return cq->cqe_size; 1377 } 1378 1379 /* Called from atomic context */ 1380 int mlx5_ib_generate_wc(struct ib_cq *ibcq, struct ib_wc *wc) 1381 { 1382 struct mlx5_ib_wc *soft_wc; 1383 struct mlx5_ib_cq *cq = to_mcq(ibcq); 1384 unsigned long flags; 1385 1386 soft_wc = kmalloc(sizeof(*soft_wc), GFP_ATOMIC); 1387 if (!soft_wc) 1388 return -ENOMEM; 1389 1390 soft_wc->wc = *wc; 1391 spin_lock_irqsave(&cq->lock, flags); 1392 list_add_tail(&soft_wc->list, &cq->wc_list); 1393 if (cq->notify_flags == IB_CQ_NEXT_COMP || 1394 wc->status != IB_WC_SUCCESS) { 1395 cq->notify_flags = 0; 1396 schedule_work(&cq->notify_work); 1397 } 1398 spin_unlock_irqrestore(&cq->lock, flags); 1399 1400 return 0; 1401 } 1402