1 /* 2 * Copyright (c) 2012 Mellanox Technologies, Inc. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <config.h> 34 35 #include <stdio.h> 36 #include <stdlib.h> 37 #include <pthread.h> 38 #include <string.h> 39 #include <errno.h> 40 #include <unistd.h> 41 42 #include <infiniband/opcode.h> 43 44 #include "mlx5.h" 45 #include "wqe.h" 46 #include "doorbell.h" 47 48 enum { 49 CQ_OK = 0, 50 CQ_EMPTY = -1, 51 CQ_POLL_ERR = -2 52 }; 53 54 enum { 55 MLX5_CQ_MODIFY_RESEIZE = 0, 56 MLX5_CQ_MODIFY_MODER = 1, 57 MLX5_CQ_MODIFY_MAPPING = 2, 58 }; 59 60 int mlx5_stall_num_loop = 60; 61 int mlx5_stall_cq_poll_min = 60; 62 int mlx5_stall_cq_poll_max = 100000; 63 int mlx5_stall_cq_inc_step = 100; 64 int mlx5_stall_cq_dec_step = 10; 65 66 static inline uint8_t get_cqe_l3_hdr_type(struct mlx5_cqe64 *cqe) 67 { 68 return (cqe->l4_hdr_type_etc >> 2) & 0x3; 69 } 70 71 static void *get_buf_cqe(struct mlx5_buf *buf, int n, int cqe_sz) 72 { 73 return buf->buf + n * cqe_sz; 74 } 75 76 static void *get_cqe(struct mlx5_cq *cq, int n) 77 { 78 return cq->active_buf->buf + n * cq->cqe_sz; 79 } 80 81 static void *get_sw_cqe(struct mlx5_cq *cq, int n) 82 { 83 void *cqe = get_cqe(cq, n & cq->ibv_cq.cqe); 84 struct mlx5_cqe64 *cqe64; 85 86 cqe64 = (cq->cqe_sz == 64) ? cqe : cqe + 64; 87 88 if (likely(mlx5dv_get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) && 89 !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & (cq->ibv_cq.cqe + 1)))) { 90 return cqe; 91 } else { 92 return NULL; 93 } 94 } 95 96 static void *next_cqe_sw(struct mlx5_cq *cq) 97 { 98 return get_sw_cqe(cq, cq->cons_index); 99 } 100 101 static void update_cons_index(struct mlx5_cq *cq) 102 { 103 cq->dbrec[MLX5_CQ_SET_CI] = htobe32(cq->cons_index & 0xffffff); 104 } 105 106 static inline void handle_good_req(struct ibv_wc *wc, struct mlx5_cqe64 *cqe, struct mlx5_wq *wq, int idx) 107 { 108 switch (be32toh(cqe->sop_drop_qpn) >> 24) { 109 case MLX5_OPCODE_RDMA_WRITE_IMM: 110 wc->wc_flags |= IBV_WC_WITH_IMM; 111 SWITCH_FALLTHROUGH; 112 case MLX5_OPCODE_RDMA_WRITE: 113 wc->opcode = IBV_WC_RDMA_WRITE; 114 break; 115 case MLX5_OPCODE_SEND_IMM: 116 wc->wc_flags |= IBV_WC_WITH_IMM; 117 SWITCH_FALLTHROUGH; 118 case MLX5_OPCODE_SEND: 119 case MLX5_OPCODE_SEND_INVAL: 120 wc->opcode = IBV_WC_SEND; 121 break; 122 case MLX5_OPCODE_RDMA_READ: 123 wc->opcode = IBV_WC_RDMA_READ; 124 wc->byte_len = be32toh(cqe->byte_cnt); 125 break; 126 case MLX5_OPCODE_ATOMIC_CS: 127 wc->opcode = IBV_WC_COMP_SWAP; 128 wc->byte_len = 8; 129 break; 130 case MLX5_OPCODE_ATOMIC_FA: 131 wc->opcode = IBV_WC_FETCH_ADD; 132 wc->byte_len = 8; 133 break; 134 case MLX5_OPCODE_UMR: 135 wc->opcode = wq->wr_data[idx]; 136 break; 137 case MLX5_OPCODE_TSO: 138 wc->opcode = IBV_WC_TSO; 139 break; 140 } 141 } 142 143 static inline int handle_responder_lazy(struct mlx5_cq *cq, struct mlx5_cqe64 *cqe, 144 struct mlx5_resource *cur_rsc, struct mlx5_srq *srq) 145 { 146 uint16_t wqe_ctr; 147 struct mlx5_wq *wq; 148 struct mlx5_qp *qp = rsc_to_mqp(cur_rsc); 149 int err = IBV_WC_SUCCESS; 150 151 if (srq) { 152 wqe_ctr = be16toh(cqe->wqe_counter); 153 cq->ibv_cq.wr_id = srq->wrid[wqe_ctr]; 154 mlx5_free_srq_wqe(srq, wqe_ctr); 155 if (cqe->op_own & MLX5_INLINE_SCATTER_32) 156 err = mlx5_copy_to_recv_srq(srq, wqe_ctr, cqe, 157 be32toh(cqe->byte_cnt)); 158 else if (cqe->op_own & MLX5_INLINE_SCATTER_64) 159 err = mlx5_copy_to_recv_srq(srq, wqe_ctr, cqe - 1, 160 be32toh(cqe->byte_cnt)); 161 } else { 162 if (likely(cur_rsc->type == MLX5_RSC_TYPE_QP)) { 163 wq = &qp->rq; 164 if (qp->qp_cap_cache & MLX5_RX_CSUM_VALID) 165 cq->flags |= MLX5_CQ_FLAGS_RX_CSUM_VALID; 166 } else { 167 wq = &(rsc_to_mrwq(cur_rsc)->rq); 168 } 169 170 wqe_ctr = wq->tail & (wq->wqe_cnt - 1); 171 cq->ibv_cq.wr_id = wq->wrid[wqe_ctr]; 172 ++wq->tail; 173 if (cqe->op_own & MLX5_INLINE_SCATTER_32) 174 err = mlx5_copy_to_recv_wqe(qp, wqe_ctr, cqe, 175 be32toh(cqe->byte_cnt)); 176 else if (cqe->op_own & MLX5_INLINE_SCATTER_64) 177 err = mlx5_copy_to_recv_wqe(qp, wqe_ctr, cqe - 1, 178 be32toh(cqe->byte_cnt)); 179 } 180 181 return err; 182 } 183 184 static inline int handle_responder(struct ibv_wc *wc, struct mlx5_cqe64 *cqe, 185 struct mlx5_resource *cur_rsc, struct mlx5_srq *srq) 186 { 187 uint16_t wqe_ctr; 188 struct mlx5_wq *wq; 189 struct mlx5_qp *qp = rsc_to_mqp(cur_rsc); 190 uint8_t g; 191 int err = 0; 192 193 wc->byte_len = be32toh(cqe->byte_cnt); 194 if (srq) { 195 wqe_ctr = be16toh(cqe->wqe_counter); 196 wc->wr_id = srq->wrid[wqe_ctr]; 197 mlx5_free_srq_wqe(srq, wqe_ctr); 198 if (cqe->op_own & MLX5_INLINE_SCATTER_32) 199 err = mlx5_copy_to_recv_srq(srq, wqe_ctr, cqe, 200 wc->byte_len); 201 else if (cqe->op_own & MLX5_INLINE_SCATTER_64) 202 err = mlx5_copy_to_recv_srq(srq, wqe_ctr, cqe - 1, 203 wc->byte_len); 204 } else { 205 if (likely(cur_rsc->type == MLX5_RSC_TYPE_QP)) { 206 wq = &qp->rq; 207 if (qp->qp_cap_cache & MLX5_RX_CSUM_VALID) 208 wc->wc_flags |= (!!(cqe->hds_ip_ext & MLX5_CQE_L4_OK) & 209 !!(cqe->hds_ip_ext & MLX5_CQE_L3_OK) & 210 (get_cqe_l3_hdr_type(cqe) == 211 MLX5_CQE_L3_HDR_TYPE_IPV4)) << 212 IBV_WC_IP_CSUM_OK_SHIFT; 213 } else { 214 wq = &(rsc_to_mrwq(cur_rsc)->rq); 215 } 216 217 wqe_ctr = wq->tail & (wq->wqe_cnt - 1); 218 wc->wr_id = wq->wrid[wqe_ctr]; 219 ++wq->tail; 220 if (cqe->op_own & MLX5_INLINE_SCATTER_32) 221 err = mlx5_copy_to_recv_wqe(qp, wqe_ctr, cqe, 222 wc->byte_len); 223 else if (cqe->op_own & MLX5_INLINE_SCATTER_64) 224 err = mlx5_copy_to_recv_wqe(qp, wqe_ctr, cqe - 1, 225 wc->byte_len); 226 } 227 if (err) 228 return err; 229 230 switch (cqe->op_own >> 4) { 231 case MLX5_CQE_RESP_WR_IMM: 232 wc->opcode = IBV_WC_RECV_RDMA_WITH_IMM; 233 wc->wc_flags |= IBV_WC_WITH_IMM; 234 wc->imm_data = cqe->imm_inval_pkey; 235 break; 236 case MLX5_CQE_RESP_SEND: 237 wc->opcode = IBV_WC_RECV; 238 break; 239 case MLX5_CQE_RESP_SEND_IMM: 240 wc->opcode = IBV_WC_RECV; 241 wc->wc_flags |= IBV_WC_WITH_IMM; 242 wc->imm_data = cqe->imm_inval_pkey; 243 break; 244 case MLX5_CQE_RESP_SEND_INV: 245 wc->opcode = IBV_WC_RECV; 246 wc->wc_flags |= IBV_WC_WITH_INV; 247 wc->imm_data = be32toh(cqe->imm_inval_pkey); 248 break; 249 } 250 wc->slid = be16toh(cqe->slid); 251 wc->sl = (be32toh(cqe->flags_rqpn) >> 24) & 0xf; 252 wc->src_qp = be32toh(cqe->flags_rqpn) & 0xffffff; 253 wc->dlid_path_bits = cqe->ml_path & 0x7f; 254 g = (be32toh(cqe->flags_rqpn) >> 28) & 3; 255 wc->wc_flags |= g ? IBV_WC_GRH : 0; 256 wc->pkey_index = be32toh(cqe->imm_inval_pkey) & 0xffff; 257 258 return IBV_WC_SUCCESS; 259 } 260 261 static void dump_cqe(FILE *fp, void *buf) 262 { 263 uint32_t *p = buf; 264 int i; 265 266 for (i = 0; i < 16; i += 4) 267 fprintf(fp, "%08x %08x %08x %08x\n", be32toh(p[i]), be32toh(p[i + 1]), 268 be32toh(p[i + 2]), be32toh(p[i + 3])); 269 } 270 271 static enum ibv_wc_status mlx5_handle_error_cqe(struct mlx5_err_cqe *cqe) 272 { 273 switch (cqe->syndrome) { 274 case MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR: 275 return IBV_WC_LOC_LEN_ERR; 276 case MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR: 277 return IBV_WC_LOC_QP_OP_ERR; 278 case MLX5_CQE_SYNDROME_LOCAL_PROT_ERR: 279 return IBV_WC_LOC_PROT_ERR; 280 case MLX5_CQE_SYNDROME_WR_FLUSH_ERR: 281 return IBV_WC_WR_FLUSH_ERR; 282 case MLX5_CQE_SYNDROME_MW_BIND_ERR: 283 return IBV_WC_MW_BIND_ERR; 284 case MLX5_CQE_SYNDROME_BAD_RESP_ERR: 285 return IBV_WC_BAD_RESP_ERR; 286 case MLX5_CQE_SYNDROME_LOCAL_ACCESS_ERR: 287 return IBV_WC_LOC_ACCESS_ERR; 288 case MLX5_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR: 289 return IBV_WC_REM_INV_REQ_ERR; 290 case MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR: 291 return IBV_WC_REM_ACCESS_ERR; 292 case MLX5_CQE_SYNDROME_REMOTE_OP_ERR: 293 return IBV_WC_REM_OP_ERR; 294 case MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR: 295 return IBV_WC_RETRY_EXC_ERR; 296 case MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR: 297 return IBV_WC_RNR_RETRY_EXC_ERR; 298 case MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR: 299 return IBV_WC_REM_ABORT_ERR; 300 default: 301 return IBV_WC_GENERAL_ERR; 302 } 303 } 304 305 #if defined(__x86_64__) || defined (__i386__) 306 static inline unsigned long get_cycles(void) 307 { 308 uint32_t low, high; 309 uint64_t val; 310 asm volatile ("rdtsc" : "=a" (low), "=d" (high)); 311 val = high; 312 val = (val << 32) | low; 313 return val; 314 } 315 316 static void mlx5_stall_poll_cq(void) 317 { 318 int i; 319 320 for (i = 0; i < mlx5_stall_num_loop; i++) 321 (void)get_cycles(); 322 } 323 static void mlx5_stall_cycles_poll_cq(uint64_t cycles) 324 { 325 while (get_cycles() < cycles) 326 ; /* Nothing */ 327 } 328 static void mlx5_get_cycles(uint64_t *cycles) 329 { 330 *cycles = get_cycles(); 331 } 332 #else 333 static void mlx5_stall_poll_cq(void) 334 { 335 } 336 static void mlx5_stall_cycles_poll_cq(uint64_t cycles) 337 { 338 } 339 static void mlx5_get_cycles(uint64_t *cycles) 340 { 341 } 342 #endif 343 344 static inline struct mlx5_qp *get_req_context(struct mlx5_context *mctx, 345 struct mlx5_resource **cur_rsc, 346 uint32_t rsn, int cqe_ver) 347 ALWAYS_INLINE; 348 static inline struct mlx5_qp *get_req_context(struct mlx5_context *mctx, 349 struct mlx5_resource **cur_rsc, 350 uint32_t rsn, int cqe_ver) 351 { 352 if (!*cur_rsc || (rsn != (*cur_rsc)->rsn)) 353 *cur_rsc = cqe_ver ? mlx5_find_uidx(mctx, rsn) : 354 (struct mlx5_resource *)mlx5_find_qp(mctx, rsn); 355 356 return rsc_to_mqp(*cur_rsc); 357 } 358 359 static inline int get_resp_ctx_v1(struct mlx5_context *mctx, 360 struct mlx5_resource **cur_rsc, 361 struct mlx5_srq **cur_srq, 362 uint32_t uidx, uint8_t *is_srq) 363 ALWAYS_INLINE; 364 static inline int get_resp_ctx_v1(struct mlx5_context *mctx, 365 struct mlx5_resource **cur_rsc, 366 struct mlx5_srq **cur_srq, 367 uint32_t uidx, uint8_t *is_srq) 368 { 369 struct mlx5_qp *mqp; 370 371 if (!*cur_rsc || (uidx != (*cur_rsc)->rsn)) { 372 *cur_rsc = mlx5_find_uidx(mctx, uidx); 373 if (unlikely(!*cur_rsc)) 374 return CQ_POLL_ERR; 375 } 376 377 switch ((*cur_rsc)->type) { 378 case MLX5_RSC_TYPE_QP: 379 mqp = rsc_to_mqp(*cur_rsc); 380 if (mqp->verbs_qp.qp.srq) { 381 *cur_srq = to_msrq(mqp->verbs_qp.qp.srq); 382 *is_srq = 1; 383 } 384 break; 385 case MLX5_RSC_TYPE_XSRQ: 386 *cur_srq = rsc_to_msrq(*cur_rsc); 387 *is_srq = 1; 388 break; 389 case MLX5_RSC_TYPE_RWQ: 390 break; 391 default: 392 return CQ_POLL_ERR; 393 } 394 395 return CQ_OK; 396 } 397 398 static inline int get_qp_ctx(struct mlx5_context *mctx, 399 struct mlx5_resource **cur_rsc, 400 uint32_t qpn) 401 ALWAYS_INLINE; 402 static inline int get_qp_ctx(struct mlx5_context *mctx, 403 struct mlx5_resource **cur_rsc, 404 uint32_t qpn) 405 { 406 if (!*cur_rsc || (qpn != (*cur_rsc)->rsn)) { 407 /* 408 * We do not have to take the QP table lock here, 409 * because CQs will be locked while QPs are removed 410 * from the table. 411 */ 412 *cur_rsc = (struct mlx5_resource *)mlx5_find_qp(mctx, qpn); 413 if (unlikely(!*cur_rsc)) 414 return CQ_POLL_ERR; 415 } 416 417 return CQ_OK; 418 } 419 420 static inline int get_srq_ctx(struct mlx5_context *mctx, 421 struct mlx5_srq **cur_srq, 422 uint32_t srqn_uidx) 423 ALWAYS_INLINE; 424 static inline int get_srq_ctx(struct mlx5_context *mctx, 425 struct mlx5_srq **cur_srq, 426 uint32_t srqn) 427 { 428 if (!*cur_srq || (srqn != (*cur_srq)->srqn)) { 429 *cur_srq = mlx5_find_srq(mctx, srqn); 430 if (unlikely(!*cur_srq)) 431 return CQ_POLL_ERR; 432 } 433 434 return CQ_OK; 435 } 436 437 static inline int get_cur_rsc(struct mlx5_context *mctx, 438 int cqe_ver, 439 uint32_t qpn, 440 uint32_t srqn_uidx, 441 struct mlx5_resource **cur_rsc, 442 struct mlx5_srq **cur_srq, 443 uint8_t *is_srq) 444 { 445 int err; 446 447 if (cqe_ver) { 448 err = get_resp_ctx_v1(mctx, cur_rsc, cur_srq, srqn_uidx, 449 is_srq); 450 } else { 451 if (srqn_uidx) { 452 *is_srq = 1; 453 err = get_srq_ctx(mctx, cur_srq, srqn_uidx); 454 } else { 455 err = get_qp_ctx(mctx, cur_rsc, qpn); 456 } 457 } 458 459 return err; 460 461 } 462 463 static inline int mlx5_get_next_cqe(struct mlx5_cq *cq, 464 struct mlx5_cqe64 **pcqe64, 465 void **pcqe) 466 ALWAYS_INLINE; 467 static inline int mlx5_get_next_cqe(struct mlx5_cq *cq, 468 struct mlx5_cqe64 **pcqe64, 469 void **pcqe) 470 { 471 void *cqe; 472 struct mlx5_cqe64 *cqe64; 473 474 cqe = next_cqe_sw(cq); 475 if (!cqe) 476 return CQ_EMPTY; 477 478 cqe64 = (cq->cqe_sz == 64) ? cqe : cqe + 64; 479 480 ++cq->cons_index; 481 482 VALGRIND_MAKE_MEM_DEFINED(cqe64, sizeof *cqe64); 483 484 /* 485 * Make sure we read CQ entry contents after we've checked the 486 * ownership bit. 487 */ 488 udma_from_device_barrier(); 489 490 #ifdef MLX5_DEBUG 491 { 492 struct mlx5_context *mctx = to_mctx(cq->ibv_cq.context); 493 494 if (mlx5_debug_mask & MLX5_DBG_CQ_CQE) { 495 FILE *fp = mctx->dbg_fp; 496 497 mlx5_dbg(fp, MLX5_DBG_CQ_CQE, "dump cqe for cqn 0x%x:\n", cq->cqn); 498 dump_cqe(fp, cqe64); 499 } 500 } 501 #endif 502 *pcqe64 = cqe64; 503 *pcqe = cqe; 504 505 return CQ_OK; 506 } 507 508 static inline int mlx5_parse_cqe(struct mlx5_cq *cq, 509 struct mlx5_cqe64 *cqe64, 510 void *cqe, 511 struct mlx5_resource **cur_rsc, 512 struct mlx5_srq **cur_srq, 513 struct ibv_wc *wc, 514 int cqe_ver, int lazy) 515 ALWAYS_INLINE; 516 static inline int mlx5_parse_cqe(struct mlx5_cq *cq, 517 struct mlx5_cqe64 *cqe64, 518 void *cqe, 519 struct mlx5_resource **cur_rsc, 520 struct mlx5_srq **cur_srq, 521 struct ibv_wc *wc, 522 int cqe_ver, int lazy) 523 { 524 struct mlx5_wq *wq; 525 uint16_t wqe_ctr; 526 uint32_t qpn; 527 uint32_t srqn_uidx; 528 int idx; 529 uint8_t opcode; 530 struct mlx5_err_cqe *ecqe; 531 int err = 0; 532 struct mlx5_qp *mqp; 533 struct mlx5_context *mctx; 534 uint8_t is_srq = 0; 535 536 mctx = to_mctx(ibv_cq_ex_to_cq(&cq->ibv_cq)->context); 537 qpn = be32toh(cqe64->sop_drop_qpn) & 0xffffff; 538 if (lazy) { 539 cq->cqe64 = cqe64; 540 cq->flags &= (~MLX5_CQ_FLAGS_RX_CSUM_VALID); 541 } else { 542 wc->wc_flags = 0; 543 wc->qp_num = qpn; 544 } 545 546 opcode = mlx5dv_get_cqe_opcode(cqe64); 547 switch (opcode) { 548 case MLX5_CQE_REQ: 549 { 550 mqp = get_req_context(mctx, cur_rsc, 551 (cqe_ver ? (be32toh(cqe64->srqn_uidx) & 0xffffff) : qpn), 552 cqe_ver); 553 if (unlikely(!mqp)) 554 return CQ_POLL_ERR; 555 wq = &mqp->sq; 556 wqe_ctr = be16toh(cqe64->wqe_counter); 557 idx = wqe_ctr & (wq->wqe_cnt - 1); 558 if (lazy) { 559 uint32_t wc_byte_len; 560 561 switch (be32toh(cqe64->sop_drop_qpn) >> 24) { 562 case MLX5_OPCODE_UMR: 563 cq->umr_opcode = wq->wr_data[idx]; 564 break; 565 566 case MLX5_OPCODE_RDMA_READ: 567 wc_byte_len = be32toh(cqe64->byte_cnt); 568 goto scatter_out; 569 case MLX5_OPCODE_ATOMIC_CS: 570 case MLX5_OPCODE_ATOMIC_FA: 571 wc_byte_len = 8; 572 573 scatter_out: 574 if (cqe64->op_own & MLX5_INLINE_SCATTER_32) 575 err = mlx5_copy_to_send_wqe( 576 mqp, wqe_ctr, cqe, wc_byte_len); 577 else if (cqe64->op_own & MLX5_INLINE_SCATTER_64) 578 err = mlx5_copy_to_send_wqe( 579 mqp, wqe_ctr, cqe - 1, wc_byte_len); 580 break; 581 } 582 583 cq->ibv_cq.wr_id = wq->wrid[idx]; 584 cq->ibv_cq.status = err; 585 } else { 586 handle_good_req(wc, cqe64, wq, idx); 587 588 if (cqe64->op_own & MLX5_INLINE_SCATTER_32) 589 err = mlx5_copy_to_send_wqe(mqp, wqe_ctr, cqe, 590 wc->byte_len); 591 else if (cqe64->op_own & MLX5_INLINE_SCATTER_64) 592 err = mlx5_copy_to_send_wqe( 593 mqp, wqe_ctr, cqe - 1, wc->byte_len); 594 595 wc->wr_id = wq->wrid[idx]; 596 wc->status = err; 597 } 598 599 wq->tail = wq->wqe_head[idx] + 1; 600 break; 601 } 602 case MLX5_CQE_RESP_WR_IMM: 603 case MLX5_CQE_RESP_SEND: 604 case MLX5_CQE_RESP_SEND_IMM: 605 case MLX5_CQE_RESP_SEND_INV: 606 srqn_uidx = be32toh(cqe64->srqn_uidx) & 0xffffff; 607 err = get_cur_rsc(mctx, cqe_ver, qpn, srqn_uidx, cur_rsc, 608 cur_srq, &is_srq); 609 if (unlikely(err)) 610 return CQ_POLL_ERR; 611 612 if (lazy) 613 cq->ibv_cq.status = handle_responder_lazy(cq, cqe64, 614 *cur_rsc, 615 is_srq ? *cur_srq : NULL); 616 else 617 wc->status = handle_responder(wc, cqe64, *cur_rsc, 618 is_srq ? *cur_srq : NULL); 619 break; 620 case MLX5_CQE_RESIZE_CQ: 621 break; 622 case MLX5_CQE_REQ_ERR: 623 case MLX5_CQE_RESP_ERR: 624 srqn_uidx = be32toh(cqe64->srqn_uidx) & 0xffffff; 625 ecqe = (struct mlx5_err_cqe *)cqe64; 626 { 627 enum ibv_wc_status *pstatus = lazy ? &cq->ibv_cq.status : &wc->status; 628 629 *pstatus = mlx5_handle_error_cqe(ecqe); 630 } 631 632 if (!lazy) 633 wc->vendor_err = ecqe->vendor_err_synd; 634 635 if (unlikely(ecqe->syndrome != MLX5_CQE_SYNDROME_WR_FLUSH_ERR && 636 ecqe->syndrome != MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR)) { 637 FILE *fp = mctx->dbg_fp; 638 fprintf(fp, PFX "%s: got completion with error:\n", 639 mctx->hostname); 640 dump_cqe(fp, ecqe); 641 if (mlx5_freeze_on_error_cqe) { 642 fprintf(fp, PFX "freezing at poll cq..."); 643 while (1) 644 sleep(10); 645 } 646 } 647 648 if (opcode == MLX5_CQE_REQ_ERR) { 649 mqp = get_req_context(mctx, cur_rsc, 650 (cqe_ver ? srqn_uidx : qpn), cqe_ver); 651 if (unlikely(!mqp)) 652 return CQ_POLL_ERR; 653 wq = &mqp->sq; 654 wqe_ctr = be16toh(cqe64->wqe_counter); 655 idx = wqe_ctr & (wq->wqe_cnt - 1); 656 if (lazy) 657 cq->ibv_cq.wr_id = wq->wrid[idx]; 658 else 659 wc->wr_id = wq->wrid[idx]; 660 wq->tail = wq->wqe_head[idx] + 1; 661 } else { 662 err = get_cur_rsc(mctx, cqe_ver, qpn, srqn_uidx, 663 cur_rsc, cur_srq, &is_srq); 664 if (unlikely(err)) 665 return CQ_POLL_ERR; 666 667 if (is_srq) { 668 wqe_ctr = be16toh(cqe64->wqe_counter); 669 if (lazy) 670 cq->ibv_cq.wr_id = (*cur_srq)->wrid[wqe_ctr]; 671 else 672 wc->wr_id = (*cur_srq)->wrid[wqe_ctr]; 673 mlx5_free_srq_wqe(*cur_srq, wqe_ctr); 674 } else { 675 switch ((*cur_rsc)->type) { 676 case MLX5_RSC_TYPE_RWQ: 677 wq = &(rsc_to_mrwq(*cur_rsc)->rq); 678 break; 679 default: 680 wq = &(rsc_to_mqp(*cur_rsc)->rq); 681 break; 682 } 683 684 if (lazy) 685 cq->ibv_cq.wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; 686 else 687 wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; 688 ++wq->tail; 689 } 690 } 691 break; 692 } 693 694 return CQ_OK; 695 } 696 697 static inline int mlx5_parse_lazy_cqe(struct mlx5_cq *cq, 698 struct mlx5_cqe64 *cqe64, 699 void *cqe, int cqe_ver) 700 ALWAYS_INLINE; 701 static inline int mlx5_parse_lazy_cqe(struct mlx5_cq *cq, 702 struct mlx5_cqe64 *cqe64, 703 void *cqe, int cqe_ver) 704 { 705 return mlx5_parse_cqe(cq, cqe64, cqe, &cq->cur_rsc, &cq->cur_srq, NULL, cqe_ver, 1); 706 } 707 708 static inline int mlx5_poll_one(struct mlx5_cq *cq, 709 struct mlx5_resource **cur_rsc, 710 struct mlx5_srq **cur_srq, 711 struct ibv_wc *wc, int cqe_ver) 712 ALWAYS_INLINE; 713 static inline int mlx5_poll_one(struct mlx5_cq *cq, 714 struct mlx5_resource **cur_rsc, 715 struct mlx5_srq **cur_srq, 716 struct ibv_wc *wc, int cqe_ver) 717 { 718 struct mlx5_cqe64 *cqe64; 719 void *cqe; 720 int err; 721 722 err = mlx5_get_next_cqe(cq, &cqe64, &cqe); 723 if (err == CQ_EMPTY) 724 return err; 725 726 return mlx5_parse_cqe(cq, cqe64, cqe, cur_rsc, cur_srq, wc, cqe_ver, 0); 727 } 728 729 static inline int poll_cq(struct ibv_cq *ibcq, int ne, 730 struct ibv_wc *wc, int cqe_ver) 731 ALWAYS_INLINE; 732 static inline int poll_cq(struct ibv_cq *ibcq, int ne, 733 struct ibv_wc *wc, int cqe_ver) 734 { 735 struct mlx5_cq *cq = to_mcq(ibcq); 736 struct mlx5_resource *rsc = NULL; 737 struct mlx5_srq *srq = NULL; 738 int npolled; 739 int err = CQ_OK; 740 741 if (cq->stall_enable) { 742 if (cq->stall_adaptive_enable) { 743 if (cq->stall_last_count) 744 mlx5_stall_cycles_poll_cq(cq->stall_last_count + cq->stall_cycles); 745 } else if (cq->stall_next_poll) { 746 cq->stall_next_poll = 0; 747 mlx5_stall_poll_cq(); 748 } 749 } 750 751 mlx5_spin_lock(&cq->lock); 752 753 for (npolled = 0; npolled < ne; ++npolled) { 754 err = mlx5_poll_one(cq, &rsc, &srq, wc + npolled, cqe_ver); 755 if (err != CQ_OK) 756 break; 757 } 758 759 update_cons_index(cq); 760 761 mlx5_spin_unlock(&cq->lock); 762 763 if (cq->stall_enable) { 764 if (cq->stall_adaptive_enable) { 765 if (npolled == 0) { 766 cq->stall_cycles = max(cq->stall_cycles-mlx5_stall_cq_dec_step, 767 mlx5_stall_cq_poll_min); 768 mlx5_get_cycles(&cq->stall_last_count); 769 } else if (npolled < ne) { 770 cq->stall_cycles = min(cq->stall_cycles+mlx5_stall_cq_inc_step, 771 mlx5_stall_cq_poll_max); 772 mlx5_get_cycles(&cq->stall_last_count); 773 } else { 774 cq->stall_cycles = max(cq->stall_cycles-mlx5_stall_cq_dec_step, 775 mlx5_stall_cq_poll_min); 776 cq->stall_last_count = 0; 777 } 778 } else if (err == CQ_EMPTY) { 779 cq->stall_next_poll = 1; 780 } 781 } 782 783 return err == CQ_POLL_ERR ? err : npolled; 784 } 785 786 enum polling_mode { 787 POLLING_MODE_NO_STALL, 788 POLLING_MODE_STALL, 789 POLLING_MODE_STALL_ADAPTIVE 790 }; 791 792 static inline void _mlx5_end_poll(struct ibv_cq_ex *ibcq, 793 int lock, enum polling_mode stall) 794 ALWAYS_INLINE; 795 static inline void _mlx5_end_poll(struct ibv_cq_ex *ibcq, 796 int lock, enum polling_mode stall) 797 { 798 struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); 799 800 update_cons_index(cq); 801 802 if (lock) 803 mlx5_spin_unlock(&cq->lock); 804 805 if (stall) { 806 if (stall == POLLING_MODE_STALL_ADAPTIVE) { 807 if (!(cq->flags & MLX5_CQ_FLAGS_FOUND_CQES)) { 808 cq->stall_cycles = max(cq->stall_cycles - mlx5_stall_cq_dec_step, 809 mlx5_stall_cq_poll_min); 810 mlx5_get_cycles(&cq->stall_last_count); 811 } else if (cq->flags & MLX5_CQ_FLAGS_EMPTY_DURING_POLL) { 812 cq->stall_cycles = min(cq->stall_cycles + mlx5_stall_cq_inc_step, 813 mlx5_stall_cq_poll_max); 814 mlx5_get_cycles(&cq->stall_last_count); 815 } else { 816 cq->stall_cycles = max(cq->stall_cycles - mlx5_stall_cq_dec_step, 817 mlx5_stall_cq_poll_min); 818 cq->stall_last_count = 0; 819 } 820 } else if (!(cq->flags & MLX5_CQ_FLAGS_FOUND_CQES)) { 821 cq->stall_next_poll = 1; 822 } 823 824 cq->flags &= ~(MLX5_CQ_FLAGS_FOUND_CQES | MLX5_CQ_FLAGS_EMPTY_DURING_POLL); 825 } 826 } 827 828 static inline int mlx5_start_poll(struct ibv_cq_ex *ibcq, struct ibv_poll_cq_attr *attr, 829 int lock, enum polling_mode stall, int cqe_version) 830 ALWAYS_INLINE; 831 static inline int mlx5_start_poll(struct ibv_cq_ex *ibcq, struct ibv_poll_cq_attr *attr, 832 int lock, enum polling_mode stall, int cqe_version) 833 { 834 struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); 835 struct mlx5_cqe64 *cqe64; 836 void *cqe; 837 int err; 838 839 if (unlikely(attr->comp_mask)) 840 return EINVAL; 841 842 if (stall) { 843 if (stall == POLLING_MODE_STALL_ADAPTIVE) { 844 if (cq->stall_last_count) 845 mlx5_stall_cycles_poll_cq(cq->stall_last_count + cq->stall_cycles); 846 } else if (cq->stall_next_poll) { 847 cq->stall_next_poll = 0; 848 mlx5_stall_poll_cq(); 849 } 850 } 851 852 if (lock) 853 mlx5_spin_lock(&cq->lock); 854 855 cq->cur_rsc = NULL; 856 cq->cur_srq = NULL; 857 858 err = mlx5_get_next_cqe(cq, &cqe64, &cqe); 859 if (err == CQ_EMPTY) { 860 if (lock) 861 mlx5_spin_unlock(&cq->lock); 862 863 if (stall) { 864 if (stall == POLLING_MODE_STALL_ADAPTIVE) { 865 cq->stall_cycles = max(cq->stall_cycles - mlx5_stall_cq_dec_step, 866 mlx5_stall_cq_poll_min); 867 mlx5_get_cycles(&cq->stall_last_count); 868 } else { 869 cq->stall_next_poll = 1; 870 } 871 } 872 873 return ENOENT; 874 } 875 876 if (stall) 877 cq->flags |= MLX5_CQ_FLAGS_FOUND_CQES; 878 879 err = mlx5_parse_lazy_cqe(cq, cqe64, cqe, cqe_version); 880 if (lock && err) 881 mlx5_spin_unlock(&cq->lock); 882 883 if (stall && err) { 884 if (stall == POLLING_MODE_STALL_ADAPTIVE) { 885 cq->stall_cycles = max(cq->stall_cycles - mlx5_stall_cq_dec_step, 886 mlx5_stall_cq_poll_min); 887 cq->stall_last_count = 0; 888 } 889 890 cq->flags &= ~(MLX5_CQ_FLAGS_FOUND_CQES); 891 } 892 893 return err; 894 } 895 896 static inline int mlx5_next_poll(struct ibv_cq_ex *ibcq, 897 enum polling_mode stall, int cqe_version) 898 ALWAYS_INLINE; 899 static inline int mlx5_next_poll(struct ibv_cq_ex *ibcq, 900 enum polling_mode stall, 901 int cqe_version) 902 { 903 struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); 904 struct mlx5_cqe64 *cqe64; 905 void *cqe; 906 int err; 907 908 err = mlx5_get_next_cqe(cq, &cqe64, &cqe); 909 if (err == CQ_EMPTY) { 910 if (stall == POLLING_MODE_STALL_ADAPTIVE) 911 cq->flags |= MLX5_CQ_FLAGS_EMPTY_DURING_POLL; 912 913 return ENOENT; 914 } 915 916 return mlx5_parse_lazy_cqe(cq, cqe64, cqe, cqe_version); 917 } 918 919 static inline int mlx5_next_poll_adaptive_v0(struct ibv_cq_ex *ibcq) 920 { 921 return mlx5_next_poll(ibcq, POLLING_MODE_STALL_ADAPTIVE, 0); 922 } 923 924 static inline int mlx5_next_poll_adaptive_v1(struct ibv_cq_ex *ibcq) 925 { 926 return mlx5_next_poll(ibcq, POLLING_MODE_STALL_ADAPTIVE, 1); 927 } 928 929 static inline int mlx5_next_poll_v0(struct ibv_cq_ex *ibcq) 930 { 931 return mlx5_next_poll(ibcq, 0, 0); 932 } 933 934 static inline int mlx5_next_poll_v1(struct ibv_cq_ex *ibcq) 935 { 936 return mlx5_next_poll(ibcq, 0, 1); 937 } 938 939 static inline int mlx5_start_poll_v0(struct ibv_cq_ex *ibcq, 940 struct ibv_poll_cq_attr *attr) 941 { 942 return mlx5_start_poll(ibcq, attr, 0, 0, 0); 943 } 944 945 static inline int mlx5_start_poll_v1(struct ibv_cq_ex *ibcq, 946 struct ibv_poll_cq_attr *attr) 947 { 948 return mlx5_start_poll(ibcq, attr, 0, 0, 1); 949 } 950 951 static inline int mlx5_start_poll_v0_lock(struct ibv_cq_ex *ibcq, 952 struct ibv_poll_cq_attr *attr) 953 { 954 return mlx5_start_poll(ibcq, attr, 1, 0, 0); 955 } 956 957 static inline int mlx5_start_poll_v1_lock(struct ibv_cq_ex *ibcq, 958 struct ibv_poll_cq_attr *attr) 959 { 960 return mlx5_start_poll(ibcq, attr, 1, 0, 1); 961 } 962 963 static inline int mlx5_start_poll_adaptive_stall_v0_lock(struct ibv_cq_ex *ibcq, 964 struct ibv_poll_cq_attr *attr) 965 { 966 return mlx5_start_poll(ibcq, attr, 1, POLLING_MODE_STALL_ADAPTIVE, 0); 967 } 968 969 static inline int mlx5_start_poll_stall_v0_lock(struct ibv_cq_ex *ibcq, 970 struct ibv_poll_cq_attr *attr) 971 { 972 return mlx5_start_poll(ibcq, attr, 1, POLLING_MODE_STALL, 0); 973 } 974 975 static inline int mlx5_start_poll_adaptive_stall_v1_lock(struct ibv_cq_ex *ibcq, 976 struct ibv_poll_cq_attr *attr) 977 { 978 return mlx5_start_poll(ibcq, attr, 1, POLLING_MODE_STALL_ADAPTIVE, 1); 979 } 980 981 static inline int mlx5_start_poll_stall_v1_lock(struct ibv_cq_ex *ibcq, 982 struct ibv_poll_cq_attr *attr) 983 { 984 return mlx5_start_poll(ibcq, attr, 1, POLLING_MODE_STALL, 1); 985 } 986 987 static inline int mlx5_start_poll_stall_v0(struct ibv_cq_ex *ibcq, 988 struct ibv_poll_cq_attr *attr) 989 { 990 return mlx5_start_poll(ibcq, attr, 0, POLLING_MODE_STALL, 0); 991 } 992 993 static inline int mlx5_start_poll_adaptive_stall_v0(struct ibv_cq_ex *ibcq, 994 struct ibv_poll_cq_attr *attr) 995 { 996 return mlx5_start_poll(ibcq, attr, 0, POLLING_MODE_STALL_ADAPTIVE, 0); 997 } 998 999 static inline int mlx5_start_poll_adaptive_stall_v1(struct ibv_cq_ex *ibcq, 1000 struct ibv_poll_cq_attr *attr) 1001 { 1002 return mlx5_start_poll(ibcq, attr, 0, POLLING_MODE_STALL_ADAPTIVE, 1); 1003 } 1004 1005 static inline int mlx5_start_poll_stall_v1(struct ibv_cq_ex *ibcq, 1006 struct ibv_poll_cq_attr *attr) 1007 { 1008 return mlx5_start_poll(ibcq, attr, 0, POLLING_MODE_STALL, 1); 1009 } 1010 1011 static inline void mlx5_end_poll_adaptive_stall_lock(struct ibv_cq_ex *ibcq) 1012 { 1013 _mlx5_end_poll(ibcq, 1, POLLING_MODE_STALL_ADAPTIVE); 1014 } 1015 1016 static inline void mlx5_end_poll_stall_lock(struct ibv_cq_ex *ibcq) 1017 { 1018 _mlx5_end_poll(ibcq, 1, POLLING_MODE_STALL); 1019 } 1020 1021 static inline void mlx5_end_poll_adaptive_stall(struct ibv_cq_ex *ibcq) 1022 { 1023 _mlx5_end_poll(ibcq, 0, POLLING_MODE_STALL_ADAPTIVE); 1024 } 1025 1026 static inline void mlx5_end_poll_stall(struct ibv_cq_ex *ibcq) 1027 { 1028 _mlx5_end_poll(ibcq, 0, POLLING_MODE_STALL); 1029 } 1030 1031 static inline void mlx5_end_poll(struct ibv_cq_ex *ibcq) 1032 { 1033 _mlx5_end_poll(ibcq, 0, 0); 1034 } 1035 1036 static inline void mlx5_end_poll_lock(struct ibv_cq_ex *ibcq) 1037 { 1038 _mlx5_end_poll(ibcq, 1, 0); 1039 } 1040 1041 int mlx5_poll_cq(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc) 1042 { 1043 return poll_cq(ibcq, ne, wc, 0); 1044 } 1045 1046 int mlx5_poll_cq_v1(struct ibv_cq *ibcq, int ne, struct ibv_wc *wc) 1047 { 1048 return poll_cq(ibcq, ne, wc, 1); 1049 } 1050 1051 static inline enum ibv_wc_opcode mlx5_cq_read_wc_opcode(struct ibv_cq_ex *ibcq) 1052 { 1053 struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); 1054 1055 switch (mlx5dv_get_cqe_opcode(cq->cqe64)) { 1056 case MLX5_CQE_RESP_WR_IMM: 1057 return IBV_WC_RECV_RDMA_WITH_IMM; 1058 case MLX5_CQE_RESP_SEND: 1059 case MLX5_CQE_RESP_SEND_IMM: 1060 case MLX5_CQE_RESP_SEND_INV: 1061 return IBV_WC_RECV; 1062 case MLX5_CQE_REQ: 1063 switch (be32toh(cq->cqe64->sop_drop_qpn) >> 24) { 1064 case MLX5_OPCODE_RDMA_WRITE_IMM: 1065 case MLX5_OPCODE_RDMA_WRITE: 1066 return IBV_WC_RDMA_WRITE; 1067 case MLX5_OPCODE_SEND_IMM: 1068 case MLX5_OPCODE_SEND: 1069 case MLX5_OPCODE_SEND_INVAL: 1070 return IBV_WC_SEND; 1071 case MLX5_OPCODE_RDMA_READ: 1072 return IBV_WC_RDMA_READ; 1073 case MLX5_OPCODE_ATOMIC_CS: 1074 return IBV_WC_COMP_SWAP; 1075 case MLX5_OPCODE_ATOMIC_FA: 1076 return IBV_WC_FETCH_ADD; 1077 case MLX5_OPCODE_UMR: 1078 return cq->umr_opcode; 1079 case MLX5_OPCODE_TSO: 1080 return IBV_WC_TSO; 1081 } 1082 } 1083 1084 #ifdef MLX5_DEBUG 1085 { 1086 struct mlx5_context *ctx = to_mctx(ibcq->context); 1087 1088 mlx5_dbg(ctx->dbg_fp, MLX5_DBG_CQ_CQE, "un-expected opcode in cqe\n"); 1089 } 1090 #endif 1091 return 0; 1092 } 1093 1094 static inline uint32_t mlx5_cq_read_wc_qp_num(struct ibv_cq_ex *ibcq) 1095 { 1096 struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); 1097 1098 return be32toh(cq->cqe64->sop_drop_qpn) & 0xffffff; 1099 } 1100 1101 static inline int mlx5_cq_read_wc_flags(struct ibv_cq_ex *ibcq) 1102 { 1103 struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); 1104 int wc_flags = 0; 1105 1106 if (cq->flags & MLX5_CQ_FLAGS_RX_CSUM_VALID) 1107 wc_flags = (!!(cq->cqe64->hds_ip_ext & MLX5_CQE_L4_OK) & 1108 !!(cq->cqe64->hds_ip_ext & MLX5_CQE_L3_OK) & 1109 (get_cqe_l3_hdr_type(cq->cqe64) == 1110 MLX5_CQE_L3_HDR_TYPE_IPV4)) << 1111 IBV_WC_IP_CSUM_OK_SHIFT; 1112 1113 switch (mlx5dv_get_cqe_opcode(cq->cqe64)) { 1114 case MLX5_CQE_RESP_WR_IMM: 1115 case MLX5_CQE_RESP_SEND_IMM: 1116 wc_flags |= IBV_WC_WITH_IMM; 1117 break; 1118 case MLX5_CQE_RESP_SEND_INV: 1119 wc_flags |= IBV_WC_WITH_INV; 1120 break; 1121 } 1122 1123 wc_flags |= ((be32toh(cq->cqe64->flags_rqpn) >> 28) & 3) ? IBV_WC_GRH : 0; 1124 return wc_flags; 1125 } 1126 1127 static inline uint32_t mlx5_cq_read_wc_byte_len(struct ibv_cq_ex *ibcq) 1128 { 1129 struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); 1130 1131 return be32toh(cq->cqe64->byte_cnt); 1132 } 1133 1134 static inline uint32_t mlx5_cq_read_wc_vendor_err(struct ibv_cq_ex *ibcq) 1135 { 1136 struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); 1137 struct mlx5_err_cqe *ecqe = (struct mlx5_err_cqe *)cq->cqe64; 1138 1139 return ecqe->vendor_err_synd; 1140 } 1141 1142 static inline uint32_t mlx5_cq_read_wc_imm_data(struct ibv_cq_ex *ibcq) 1143 { 1144 struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); 1145 1146 switch (mlx5dv_get_cqe_opcode(cq->cqe64)) { 1147 case MLX5_CQE_RESP_SEND_INV: 1148 return be32toh(cq->cqe64->imm_inval_pkey); 1149 default: 1150 return cq->cqe64->imm_inval_pkey; 1151 } 1152 } 1153 1154 static inline uint32_t mlx5_cq_read_wc_slid(struct ibv_cq_ex *ibcq) 1155 { 1156 struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); 1157 1158 return (uint32_t)be16toh(cq->cqe64->slid); 1159 } 1160 1161 static inline uint8_t mlx5_cq_read_wc_sl(struct ibv_cq_ex *ibcq) 1162 { 1163 struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); 1164 1165 return (be32toh(cq->cqe64->flags_rqpn) >> 24) & 0xf; 1166 } 1167 1168 static inline uint32_t mlx5_cq_read_wc_src_qp(struct ibv_cq_ex *ibcq) 1169 { 1170 struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); 1171 1172 return be32toh(cq->cqe64->flags_rqpn) & 0xffffff; 1173 } 1174 1175 static inline uint8_t mlx5_cq_read_wc_dlid_path_bits(struct ibv_cq_ex *ibcq) 1176 { 1177 struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); 1178 1179 return cq->cqe64->ml_path & 0x7f; 1180 } 1181 1182 static inline uint64_t mlx5_cq_read_wc_completion_ts(struct ibv_cq_ex *ibcq) 1183 { 1184 struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); 1185 1186 return be64toh(cq->cqe64->timestamp); 1187 } 1188 1189 static inline uint16_t mlx5_cq_read_wc_cvlan(struct ibv_cq_ex *ibcq) 1190 { 1191 struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); 1192 1193 return be16toh(cq->cqe64->vlan_info); 1194 } 1195 1196 static inline uint32_t mlx5_cq_read_flow_tag(struct ibv_cq_ex *ibcq) 1197 { 1198 struct mlx5_cq *cq = to_mcq(ibv_cq_ex_to_cq(ibcq)); 1199 1200 return be32toh(cq->cqe64->sop_drop_qpn) & MLX5_FLOW_TAG_MASK; 1201 } 1202 1203 #define BIT(i) (1UL << (i)) 1204 1205 #define SINGLE_THREADED BIT(0) 1206 #define STALL BIT(1) 1207 #define V1 BIT(2) 1208 #define ADAPTIVE BIT(3) 1209 1210 #define mlx5_start_poll_name(cqe_ver, lock, stall, adaptive) \ 1211 mlx5_start_poll##adaptive##stall##cqe_ver##lock 1212 #define mlx5_next_poll_name(cqe_ver, adaptive) \ 1213 mlx5_next_poll##adaptive##cqe_ver 1214 #define mlx5_end_poll_name(lock, stall, adaptive) \ 1215 mlx5_end_poll##adaptive##stall##lock 1216 1217 #define POLL_FN_ENTRY(cqe_ver, lock, stall, adaptive) { \ 1218 .start_poll = &mlx5_start_poll_name(cqe_ver, lock, stall, adaptive), \ 1219 .next_poll = &mlx5_next_poll_name(cqe_ver, adaptive), \ 1220 .end_poll = &mlx5_end_poll_name(lock, stall, adaptive), \ 1221 } 1222 1223 static const struct op 1224 { 1225 int (*start_poll)(struct ibv_cq_ex *ibcq, struct ibv_poll_cq_attr *attr); 1226 int (*next_poll)(struct ibv_cq_ex *ibcq); 1227 void (*end_poll)(struct ibv_cq_ex *ibcq); 1228 } ops[ADAPTIVE + V1 + STALL + SINGLE_THREADED + 1] = { 1229 [V1] = POLL_FN_ENTRY(_v1, _lock, , ), 1230 [0] = POLL_FN_ENTRY(_v0, _lock, , ), 1231 [V1 | SINGLE_THREADED] = POLL_FN_ENTRY(_v1, , , ), 1232 [SINGLE_THREADED] = POLL_FN_ENTRY(_v0, , , ), 1233 [V1 | STALL] = POLL_FN_ENTRY(_v1, _lock, _stall, ), 1234 [STALL] = POLL_FN_ENTRY(_v0, _lock, _stall, ), 1235 [V1 | SINGLE_THREADED | STALL] = POLL_FN_ENTRY(_v1, , _stall, ), 1236 [SINGLE_THREADED | STALL] = POLL_FN_ENTRY(_v0, , _stall, ), 1237 [V1 | STALL | ADAPTIVE] = POLL_FN_ENTRY(_v1, _lock, _stall, _adaptive), 1238 [STALL | ADAPTIVE] = POLL_FN_ENTRY(_v0, _lock, _stall, _adaptive), 1239 [V1 | SINGLE_THREADED | STALL | ADAPTIVE] = POLL_FN_ENTRY(_v1, , _stall, _adaptive), 1240 [SINGLE_THREADED | STALL | ADAPTIVE] = POLL_FN_ENTRY(_v0, , _stall, _adaptive), 1241 }; 1242 1243 void mlx5_cq_fill_pfns(struct mlx5_cq *cq, const struct ibv_cq_init_attr_ex *cq_attr) 1244 { 1245 struct mlx5_context *mctx = to_mctx(ibv_cq_ex_to_cq(&cq->ibv_cq)->context); 1246 const struct op *poll_ops = &ops[((cq->stall_enable && cq->stall_adaptive_enable) ? ADAPTIVE : 0) | 1247 (mctx->cqe_version ? V1 : 0) | 1248 (cq->flags & MLX5_CQ_FLAGS_SINGLE_THREADED ? 1249 SINGLE_THREADED : 0) | 1250 (cq->stall_enable ? STALL : 0)]; 1251 1252 cq->ibv_cq.start_poll = poll_ops->start_poll; 1253 cq->ibv_cq.next_poll = poll_ops->next_poll; 1254 cq->ibv_cq.end_poll = poll_ops->end_poll; 1255 1256 cq->ibv_cq.read_opcode = mlx5_cq_read_wc_opcode; 1257 cq->ibv_cq.read_vendor_err = mlx5_cq_read_wc_vendor_err; 1258 cq->ibv_cq.read_wc_flags = mlx5_cq_read_wc_flags; 1259 if (cq_attr->wc_flags & IBV_WC_EX_WITH_BYTE_LEN) 1260 cq->ibv_cq.read_byte_len = mlx5_cq_read_wc_byte_len; 1261 if (cq_attr->wc_flags & IBV_WC_EX_WITH_IMM) 1262 cq->ibv_cq.read_imm_data = mlx5_cq_read_wc_imm_data; 1263 if (cq_attr->wc_flags & IBV_WC_EX_WITH_QP_NUM) 1264 cq->ibv_cq.read_qp_num = mlx5_cq_read_wc_qp_num; 1265 if (cq_attr->wc_flags & IBV_WC_EX_WITH_SRC_QP) 1266 cq->ibv_cq.read_src_qp = mlx5_cq_read_wc_src_qp; 1267 if (cq_attr->wc_flags & IBV_WC_EX_WITH_SLID) 1268 cq->ibv_cq.read_slid = mlx5_cq_read_wc_slid; 1269 if (cq_attr->wc_flags & IBV_WC_EX_WITH_SL) 1270 cq->ibv_cq.read_sl = mlx5_cq_read_wc_sl; 1271 if (cq_attr->wc_flags & IBV_WC_EX_WITH_DLID_PATH_BITS) 1272 cq->ibv_cq.read_dlid_path_bits = mlx5_cq_read_wc_dlid_path_bits; 1273 if (cq_attr->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP) 1274 cq->ibv_cq.read_completion_ts = mlx5_cq_read_wc_completion_ts; 1275 if (cq_attr->wc_flags & IBV_WC_EX_WITH_CVLAN) 1276 cq->ibv_cq.read_cvlan = mlx5_cq_read_wc_cvlan; 1277 if (cq_attr->wc_flags & IBV_WC_EX_WITH_FLOW_TAG) 1278 cq->ibv_cq.read_flow_tag = mlx5_cq_read_flow_tag; 1279 } 1280 1281 int mlx5_arm_cq(struct ibv_cq *ibvcq, int solicited) 1282 { 1283 struct mlx5_cq *cq = to_mcq(ibvcq); 1284 struct mlx5_context *ctx = to_mctx(ibvcq->context); 1285 uint32_t doorbell[2]; 1286 uint32_t sn; 1287 uint32_t ci; 1288 uint32_t cmd; 1289 1290 sn = cq->arm_sn & 3; 1291 ci = cq->cons_index & 0xffffff; 1292 cmd = solicited ? MLX5_CQ_DB_REQ_NOT_SOL : MLX5_CQ_DB_REQ_NOT; 1293 1294 cq->dbrec[MLX5_CQ_ARM_DB] = htobe32(sn << 28 | cmd | ci); 1295 1296 /* 1297 * Make sure that the doorbell record in host memory is 1298 * written before ringing the doorbell via PCI WC MMIO. 1299 */ 1300 mmio_wc_start(); 1301 1302 doorbell[0] = htobe32(sn << 28 | cmd | ci); 1303 doorbell[1] = htobe32(cq->cqn); 1304 1305 mlx5_write64(doorbell, ctx->uar[0] + MLX5_CQ_DOORBELL, &ctx->lock32); 1306 1307 mmio_flush_writes(); 1308 1309 return 0; 1310 } 1311 1312 void mlx5_cq_event(struct ibv_cq *cq) 1313 { 1314 to_mcq(cq)->arm_sn++; 1315 } 1316 1317 static int is_equal_rsn(struct mlx5_cqe64 *cqe64, uint32_t rsn) 1318 { 1319 return rsn == (be32toh(cqe64->sop_drop_qpn) & 0xffffff); 1320 } 1321 1322 static inline int is_equal_uidx(struct mlx5_cqe64 *cqe64, uint32_t uidx) 1323 { 1324 return uidx == (be32toh(cqe64->srqn_uidx) & 0xffffff); 1325 } 1326 1327 static inline int is_responder(uint8_t opcode) 1328 { 1329 switch (opcode) { 1330 case MLX5_CQE_RESP_WR_IMM: 1331 case MLX5_CQE_RESP_SEND: 1332 case MLX5_CQE_RESP_SEND_IMM: 1333 case MLX5_CQE_RESP_SEND_INV: 1334 case MLX5_CQE_RESP_ERR: 1335 return 1; 1336 } 1337 1338 return 0; 1339 } 1340 1341 static inline int free_res_cqe(struct mlx5_cqe64 *cqe64, uint32_t rsn, 1342 struct mlx5_srq *srq, int cqe_version) 1343 { 1344 if (cqe_version) { 1345 if (is_equal_uidx(cqe64, rsn)) { 1346 if (srq && is_responder(mlx5dv_get_cqe_opcode(cqe64))) 1347 mlx5_free_srq_wqe(srq, 1348 be16toh(cqe64->wqe_counter)); 1349 return 1; 1350 } 1351 } else { 1352 if (is_equal_rsn(cqe64, rsn)) { 1353 if (srq && (be32toh(cqe64->srqn_uidx) & 0xffffff)) 1354 mlx5_free_srq_wqe(srq, 1355 be16toh(cqe64->wqe_counter)); 1356 return 1; 1357 } 1358 } 1359 1360 return 0; 1361 } 1362 1363 void __mlx5_cq_clean(struct mlx5_cq *cq, uint32_t rsn, struct mlx5_srq *srq) 1364 { 1365 uint32_t prod_index; 1366 int nfreed = 0; 1367 struct mlx5_cqe64 *cqe64, *dest64; 1368 void *cqe, *dest; 1369 uint8_t owner_bit; 1370 int cqe_version; 1371 1372 if (!cq || cq->flags & MLX5_CQ_FLAGS_DV_OWNED) 1373 return; 1374 1375 /* 1376 * First we need to find the current producer index, so we 1377 * know where to start cleaning from. It doesn't matter if HW 1378 * adds new entries after this loop -- the QP we're worried 1379 * about is already in RESET, so the new entries won't come 1380 * from our QP and therefore don't need to be checked. 1381 */ 1382 for (prod_index = cq->cons_index; get_sw_cqe(cq, prod_index); ++prod_index) 1383 if (prod_index == cq->cons_index + cq->ibv_cq.cqe) 1384 break; 1385 1386 /* 1387 * Now sweep backwards through the CQ, removing CQ entries 1388 * that match our QP by copying older entries on top of them. 1389 */ 1390 cqe_version = (to_mctx(cq->ibv_cq.context))->cqe_version; 1391 while ((int) --prod_index - (int) cq->cons_index >= 0) { 1392 cqe = get_cqe(cq, prod_index & cq->ibv_cq.cqe); 1393 cqe64 = (cq->cqe_sz == 64) ? cqe : cqe + 64; 1394 if (free_res_cqe(cqe64, rsn, srq, cqe_version)) { 1395 ++nfreed; 1396 } else if (nfreed) { 1397 dest = get_cqe(cq, (prod_index + nfreed) & cq->ibv_cq.cqe); 1398 dest64 = (cq->cqe_sz == 64) ? dest : dest + 64; 1399 owner_bit = dest64->op_own & MLX5_CQE_OWNER_MASK; 1400 memcpy(dest, cqe, cq->cqe_sz); 1401 dest64->op_own = owner_bit | 1402 (dest64->op_own & ~MLX5_CQE_OWNER_MASK); 1403 } 1404 } 1405 1406 if (nfreed) { 1407 cq->cons_index += nfreed; 1408 /* 1409 * Make sure update of buffer contents is done before 1410 * updating consumer index. 1411 */ 1412 udma_to_device_barrier(); 1413 update_cons_index(cq); 1414 } 1415 } 1416 1417 void mlx5_cq_clean(struct mlx5_cq *cq, uint32_t qpn, struct mlx5_srq *srq) 1418 { 1419 mlx5_spin_lock(&cq->lock); 1420 __mlx5_cq_clean(cq, qpn, srq); 1421 mlx5_spin_unlock(&cq->lock); 1422 } 1423 1424 static uint8_t sw_ownership_bit(int n, int nent) 1425 { 1426 return (n & nent) ? 1 : 0; 1427 } 1428 1429 static int is_hw(uint8_t own, int n, int mask) 1430 { 1431 return (own & MLX5_CQE_OWNER_MASK) ^ !!(n & (mask + 1)); 1432 } 1433 1434 void mlx5_cq_resize_copy_cqes(struct mlx5_cq *cq) 1435 { 1436 struct mlx5_cqe64 *scqe64; 1437 struct mlx5_cqe64 *dcqe64; 1438 void *start_cqe; 1439 void *scqe; 1440 void *dcqe; 1441 int ssize; 1442 int dsize; 1443 int i; 1444 uint8_t sw_own; 1445 1446 ssize = cq->cqe_sz; 1447 dsize = cq->resize_cqe_sz; 1448 1449 i = cq->cons_index; 1450 scqe = get_buf_cqe(cq->active_buf, i & cq->active_cqes, ssize); 1451 scqe64 = ssize == 64 ? scqe : scqe + 64; 1452 start_cqe = scqe; 1453 if (is_hw(scqe64->op_own, i, cq->active_cqes)) { 1454 fprintf(stderr, "expected cqe in sw ownership\n"); 1455 return; 1456 } 1457 1458 while ((scqe64->op_own >> 4) != MLX5_CQE_RESIZE_CQ) { 1459 dcqe = get_buf_cqe(cq->resize_buf, (i + 1) & (cq->resize_cqes - 1), dsize); 1460 dcqe64 = dsize == 64 ? dcqe : dcqe + 64; 1461 sw_own = sw_ownership_bit(i + 1, cq->resize_cqes); 1462 memcpy(dcqe, scqe, ssize); 1463 dcqe64->op_own = (dcqe64->op_own & ~MLX5_CQE_OWNER_MASK) | sw_own; 1464 1465 ++i; 1466 scqe = get_buf_cqe(cq->active_buf, i & cq->active_cqes, ssize); 1467 scqe64 = ssize == 64 ? scqe : scqe + 64; 1468 if (is_hw(scqe64->op_own, i, cq->active_cqes)) { 1469 fprintf(stderr, "expected cqe in sw ownership\n"); 1470 return; 1471 } 1472 1473 if (scqe == start_cqe) { 1474 fprintf(stderr, "resize CQ failed to get resize CQE\n"); 1475 return; 1476 } 1477 } 1478 ++cq->cons_index; 1479 } 1480 1481 int mlx5_alloc_cq_buf(struct mlx5_context *mctx, struct mlx5_cq *cq, 1482 struct mlx5_buf *buf, int nent, int cqe_sz) 1483 { 1484 struct mlx5_cqe64 *cqe; 1485 int i; 1486 struct mlx5_device *dev = to_mdev(mctx->ibv_ctx.device); 1487 int ret; 1488 enum mlx5_alloc_type type; 1489 enum mlx5_alloc_type default_type = MLX5_ALLOC_TYPE_ANON; 1490 1491 if (mlx5_use_huge("HUGE_CQ")) 1492 default_type = MLX5_ALLOC_TYPE_HUGE; 1493 1494 mlx5_get_alloc_type(MLX5_CQ_PREFIX, &type, default_type); 1495 1496 ret = mlx5_alloc_prefered_buf(mctx, buf, 1497 align(nent * cqe_sz, dev->page_size), 1498 dev->page_size, 1499 type, 1500 MLX5_CQ_PREFIX); 1501 1502 if (ret) 1503 return -1; 1504 1505 memset(buf->buf, 0, nent * cqe_sz); 1506 1507 for (i = 0; i < nent; ++i) { 1508 cqe = buf->buf + i * cqe_sz; 1509 cqe += cqe_sz == 128 ? 1 : 0; 1510 cqe->op_own = MLX5_CQE_INVALID << 4; 1511 } 1512 1513 return 0; 1514 } 1515 1516 int mlx5_free_cq_buf(struct mlx5_context *ctx, struct mlx5_buf *buf) 1517 { 1518 return mlx5_free_actual_buf(ctx, buf); 1519 } 1520