1 /* 2 * Copyright (c) 2024, Broadcom. All rights reserved. The term 3 * Broadcom refers to Broadcom Limited and/or its subsidiaries. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in 13 * the documentation and/or other materials provided with the 14 * distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' 17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 18 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 23 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 24 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 25 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN 26 * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 */ 29 30 #include <sys/mman.h> 31 32 #include <netinet/in.h> 33 34 #include <assert.h> 35 #include <errno.h> 36 #include <malloc.h> 37 #include <pthread.h> 38 #include <signal.h> 39 #include <stdio.h> 40 #include <stdlib.h> 41 #include <string.h> 42 #include <unistd.h> 43 44 #include "main.h" 45 #include "verbs.h" 46 47 static int ibv_to_bnxt_re_wr_opcd[11] = { 48 BNXT_RE_WR_OPCD_RDMA_WRITE, 49 BNXT_RE_WR_OPCD_RDMA_WRITE_IMM, 50 BNXT_RE_WR_OPCD_SEND, 51 BNXT_RE_WR_OPCD_SEND_IMM, 52 BNXT_RE_WR_OPCD_RDMA_READ, 53 BNXT_RE_WR_OPCD_ATOMIC_CS, 54 BNXT_RE_WR_OPCD_ATOMIC_FA, 55 BNXT_RE_WR_OPCD_INVAL, 56 BNXT_RE_WR_OPCD_INVAL, 57 BNXT_RE_WR_OPCD_INVAL, 58 BNXT_RE_WR_OPCD_INVAL 59 }; 60 61 static int ibv_wr_to_wc_opcd[11] = { 62 IBV_WC_RDMA_WRITE, 63 IBV_WC_RDMA_WRITE, 64 IBV_WC_SEND, 65 IBV_WC_SEND, 66 IBV_WC_RDMA_READ, 67 IBV_WC_COMP_SWAP, 68 IBV_WC_FETCH_ADD, 69 0xFF, 70 0xFF, 71 0xFF, 72 0xFF 73 }; 74 75 static int bnxt_re_req_to_ibv_status [12] = { 76 IBV_WC_SUCCESS, 77 IBV_WC_BAD_RESP_ERR, 78 IBV_WC_LOC_LEN_ERR, 79 IBV_WC_LOC_QP_OP_ERR, 80 IBV_WC_LOC_PROT_ERR, 81 IBV_WC_MW_BIND_ERR, 82 IBV_WC_REM_INV_REQ_ERR, 83 IBV_WC_REM_ACCESS_ERR, 84 IBV_WC_REM_OP_ERR, 85 IBV_WC_RNR_RETRY_EXC_ERR, 86 IBV_WC_RETRY_EXC_ERR, 87 IBV_WC_WR_FLUSH_ERR 88 }; 89 90 static int bnxt_re_res_to_ibv_status [9] = { 91 IBV_WC_SUCCESS, 92 IBV_WC_LOC_ACCESS_ERR, 93 IBV_WC_LOC_LEN_ERR, 94 IBV_WC_LOC_PROT_ERR, 95 IBV_WC_LOC_QP_OP_ERR, 96 IBV_WC_MW_BIND_ERR, 97 IBV_WC_REM_INV_REQ_ERR, 98 IBV_WC_WR_FLUSH_ERR, 99 IBV_WC_FATAL_ERR 100 }; 101 102 static int bnxt_re_poll_one(struct bnxt_re_cq *cq, int nwc, struct ibv_wc *wc, 103 uint32_t *resize); 104 105 int bnxt_single_threaded; 106 int bnxt_dyn_debug; 107 int bnxt_re_query_device(struct ibv_context *ibvctx, 108 struct ibv_device_attr *dev_attr) 109 { 110 struct ibv_query_device cmd = {}; 111 uint8_t fw_ver[8]; 112 int status; 113 114 memset(dev_attr, 0, sizeof(struct ibv_device_attr)); 115 status = ibv_cmd_query_device(ibvctx, dev_attr, (uint64_t *)&fw_ver, 116 &cmd, sizeof(cmd)); 117 snprintf(dev_attr->fw_ver, 64, "%d.%d.%d.%d", 118 fw_ver[0], fw_ver[1], fw_ver[2], fw_ver[3]); 119 120 return status; 121 } 122 123 int bnxt_re_query_device_compat(struct ibv_context *ibvctx, 124 struct ibv_device_attr *dev_attr) 125 126 { 127 int rc = 0; 128 129 rc = bnxt_re_query_device(ibvctx, dev_attr); 130 131 return rc; 132 } 133 134 int bnxt_re_query_port(struct ibv_context *ibvctx, uint8_t port, 135 struct ibv_port_attr *port_attr) 136 { 137 struct ibv_query_port cmd = {}; 138 139 return ibv_cmd_query_port(ibvctx, port, port_attr, &cmd, sizeof(cmd)); 140 } 141 142 static inline bool bnxt_re_is_wcdpi_enabled(struct bnxt_re_context *cntx) 143 { 144 return cntx->comp_mask & BNXT_RE_COMP_MASK_UCNTX_WC_DPI_ENABLED; 145 } 146 147 static int bnxt_re_map_db_page(struct ibv_context *ibvctx, 148 uint64_t dbr, uint32_t dpi, uint32_t wcdpi) 149 { 150 struct bnxt_re_context *cntx = to_bnxt_re_context(ibvctx); 151 struct bnxt_re_dev *dev = to_bnxt_re_dev(ibvctx->device); 152 153 cntx->udpi.dpindx = dpi; 154 cntx->udpi.dbpage = mmap(NULL, dev->pg_size, PROT_WRITE, 155 MAP_SHARED, ibvctx->cmd_fd, dbr); 156 if (cntx->udpi.dbpage == MAP_FAILED) 157 return -ENOMEM; 158 if (wcdpi) { 159 cntx->udpi.wcdbpg = mmap(NULL, dev->pg_size, PROT_WRITE, 160 MAP_SHARED, ibvctx->cmd_fd, 161 BNXT_RE_MAP_WC); 162 if (cntx->udpi.wcdbpg == MAP_FAILED) 163 return -ENOMEM; 164 cntx->udpi.wcdpi = wcdpi; 165 } 166 167 return 0; 168 } 169 170 struct ibv_pd *bnxt_re_alloc_pd(struct ibv_context *ibvctx) 171 { 172 struct bnxt_re_context *cntx = to_bnxt_re_context(ibvctx); 173 struct bnxt_re_pd_resp resp = {}; 174 struct ibv_alloc_pd cmd = {}; 175 struct bnxt_re_pd *pd; 176 uint64_t dbr_map; 177 178 pd = calloc(1, sizeof(*pd)); 179 if (!pd) 180 return NULL; 181 182 if (ibv_cmd_alloc_pd(ibvctx, &pd->ibvpd, &cmd, sizeof(cmd), 183 &resp.resp, sizeof(resp))) 184 goto out; 185 186 pd->pdid = resp.pdid; 187 /* Map DB page now. */ 188 if (!cntx->udpi.dbpage) { 189 uint32_t wcdpi = 0; 190 191 if (bnxt_re_is_wcdpi_enabled(cntx) && 192 resp.comp_mask & BNXT_RE_COMP_MASK_PD_HAS_WC_DPI) 193 wcdpi = resp.wcdpi; 194 if (bnxt_re_map_db_page(ibvctx, resp.dbr, resp.dpi, wcdpi)) 195 goto fail; 196 if (cntx->cctx->chip_is_gen_p5_thor2 && cntx->udpi.wcdpi) 197 bnxt_re_init_pbuf_list(cntx); 198 } 199 if (resp.comp_mask & BNXT_RE_COMP_MASK_PD_HAS_DBR_BAR_ADDR) { 200 dbr_map = resp.dbr_bar_map & 0xFFFFFFFFFFFFF000; 201 cntx->bar_map = mmap(NULL, 4096, PROT_READ, 202 MAP_SHARED, ibvctx->cmd_fd, dbr_map); 203 if (cntx->bar_map == MAP_FAILED) 204 goto fail; 205 } 206 207 return &pd->ibvpd; 208 fail: 209 ibv_cmd_dealloc_pd(&pd->ibvpd); 210 out: 211 free(pd); 212 return NULL; 213 } 214 215 int bnxt_re_free_pd(struct ibv_pd *ibvpd) 216 { 217 struct bnxt_re_pd *pd = to_bnxt_re_pd(ibvpd); 218 int status; 219 220 status = ibv_cmd_dealloc_pd(ibvpd); 221 if (status) 222 return status; 223 /* DPI un-mapping will be done during uninit_ucontext */ 224 free(pd); 225 226 return 0; 227 } 228 229 struct ibv_mr *get_ibv_mr_from_bnxt_re_mr(struct bnxt_re_mr *mr) 230 { 231 return &mr->vmr; 232 } 233 234 struct ibv_mr *bnxt_re_reg_mr(struct ibv_pd *ibvpd, void *sva, size_t len, 235 int access) 236 { 237 struct bnxt_re_mr_resp resp = {}; 238 struct ibv_reg_mr cmd = {}; 239 struct bnxt_re_mr *mr; 240 uint64_t hw_va; 241 hw_va = (uint64_t) sva; 242 243 mr = calloc(1, sizeof(*mr)); 244 if (!mr) 245 return NULL; 246 247 if (ibv_cmd_reg_mr(ibvpd, sva, len, hw_va, access, &mr->vmr, 248 &cmd, sizeof(cmd), &resp.resp, sizeof(resp))) { 249 free(mr); 250 return NULL; 251 } 252 253 return get_ibv_mr_from_bnxt_re_mr(mr); 254 } 255 256 int bnxt_re_dereg_mr(VERBS_MR *ibvmr) 257 { 258 struct bnxt_re_mr *mr = (struct bnxt_re_mr *)ibvmr; 259 int status; 260 261 status = ibv_cmd_dereg_mr(ibvmr); 262 if (status) 263 return status; 264 free(mr); 265 266 return 0; 267 } 268 269 void *bnxt_re_alloc_cqslab(struct bnxt_re_context *cntx, 270 uint32_t ncqe, uint32_t cur) 271 { 272 struct bnxt_re_mem *mem; 273 uint32_t depth, sz; 274 275 depth = bnxt_re_init_depth(ncqe + 1, cntx->comp_mask); 276 if (depth > cntx->rdev->max_cq_depth + 1) 277 depth = cntx->rdev->max_cq_depth + 1; 278 if (depth == cur) 279 return NULL; 280 sz = get_aligned((depth * cntx->rdev->cqe_size), cntx->rdev->pg_size); 281 mem = bnxt_re_alloc_mem(sz, cntx->rdev->pg_size); 282 if (mem) 283 mem->pad = depth; 284 return mem; 285 } 286 287 struct ibv_cq *_bnxt_re_create_cq(struct ibv_context *ibvctx, int ncqe, 288 struct ibv_comp_channel *channel, int vec, 289 bool soft_cq) 290 { 291 struct bnxt_re_context *cntx = to_bnxt_re_context(ibvctx); 292 struct bnxt_re_dev *dev = to_bnxt_re_dev(ibvctx->device); 293 struct bnxt_re_cq_resp resp = {}; 294 struct bnxt_re_cq_req cmd = {}; 295 struct bnxt_re_cq *cq; 296 bool has_dpi; 297 298 if (ncqe > dev->max_cq_depth) 299 return NULL; 300 301 cq = calloc(1, (sizeof(*cq) + sizeof(struct bnxt_re_queue))); 302 if (!cq) 303 return NULL; 304 cq->cqq = (void *)((char *)cq + sizeof(*cq)); 305 if (!cq->cqq) 306 goto mem; 307 308 cq->mem = bnxt_re_alloc_cqslab(cntx, ncqe, 0); 309 if (!cq->mem) 310 goto mem; 311 cq->cqq->depth = cq->mem->pad; 312 cq->cqq->stride = dev->cqe_size; 313 /* As an exception no need to call get_ring api we know 314 * this is the only consumer 315 */ 316 cq->cqq->va = cq->mem->va_head; 317 if (!cq->cqq->va) 318 goto fail; 319 320 cmd.cq_va = (uint64_t)cq->cqq->va; 321 cmd.cq_handle = (uint64_t)cq; 322 if (soft_cq) { 323 cmd.comp_mask |= BNXT_RE_COMP_MASK_CQ_REQ_HAS_CAP_MASK; 324 cmd.cq_capab |= BNXT_RE_COMP_MASK_CQ_REQ_CAP_DBR_RECOVERY; 325 } 326 if (ibv_cmd_create_cq(ibvctx, ncqe, channel, vec, 327 &cq->ibvcq, &cmd.cmd, sizeof(cmd), 328 &resp.resp, sizeof(resp))) 329 goto fail; 330 331 has_dpi = resp.comp_mask & BNXT_RE_COMP_MASK_CQ_HAS_DB_INFO; 332 if (!cntx->udpi.dbpage && has_dpi) { 333 uint32_t wcdpi = 0; 334 335 if (bnxt_re_is_wcdpi_enabled(cntx) && 336 resp.comp_mask & BNXT_RE_COMP_MASK_CQ_HAS_WC_DPI) 337 wcdpi = resp.wcdpi; 338 if (bnxt_re_map_db_page(ibvctx, resp.dbr, resp.dpi, wcdpi)) 339 goto fail; 340 if (cntx->cctx->chip_is_gen_p5_thor2 && cntx->udpi.wcdpi) 341 bnxt_re_init_pbuf_list(cntx); 342 } 343 344 if (resp.comp_mask & BNXT_RE_COMP_MASK_CQ_HAS_CQ_PAGE) { 345 cq->cq_page = mmap(NULL, dev->pg_size, PROT_WRITE, MAP_SHARED, 346 ibvctx->cmd_fd, resp.cq_page); 347 if (!cq->cq_page) 348 fprintf(stderr, DEV "Valid cq_page not mapped\n"); 349 } 350 351 cq->cqid = resp.cqid; 352 cq->phase = resp.phase; 353 cq->cqq->tail = resp.tail; 354 cq->udpi = &cntx->udpi; 355 cq->first_arm = true; 356 cq->cntx = cntx; 357 cq->rand.seed = cq->cqid; 358 cq->shadow_db_key = BNXT_RE_DB_KEY_INVALID; 359 bnxt_re_dp_spin_init(&cq->cqq->qlock, PTHREAD_PROCESS_PRIVATE, !bnxt_single_threaded); 360 INIT_DBLY_LIST_HEAD(&cq->sfhead); 361 INIT_DBLY_LIST_HEAD(&cq->rfhead); 362 INIT_DBLY_LIST_HEAD(&cq->prev_cq_head); 363 if (_is_db_drop_recovery_enable(cntx) && !soft_cq) { 364 INIT_DBLY_LIST_NODE(&cq->dbnode); 365 pthread_spin_lock(&cntx->cq_dbr_res.lock); 366 bnxt_re_list_add_node(&cq->dbnode, &cntx->cq_dbr_res.head); 367 pthread_spin_unlock(&cntx->cq_dbr_res.lock); 368 } 369 370 return &cq->ibvcq; 371 fail: 372 bnxt_re_free_mem(cq->mem); 373 mem: 374 free(cq); 375 return NULL; 376 } 377 378 struct ibv_cq *bnxt_re_create_cq(struct ibv_context *ibvctx, int ncqe, 379 struct ibv_comp_channel *channel, int vec) 380 { 381 struct bnxt_re_context *cntx = to_bnxt_re_context(ibvctx); 382 struct bnxt_re_dev *dev = to_bnxt_re_dev(ibvctx->device); 383 sigset_t block_sig_set, old_sig_set; 384 int ret; 385 386 if (_is_db_drop_recovery_enable(cntx) && !cntx->dbr_cq) { 387 cntx->dbr_ev_chan = 388 ibv_create_comp_channel(ibvctx); 389 if (!cntx->dbr_ev_chan) { 390 fprintf(stderr, 391 DEV "Failed to create completion channel\n"); 392 goto free; 393 } 394 cntx->dbr_cq = _bnxt_re_create_cq(ibvctx, 1, cntx->dbr_ev_chan, vec, 1); 395 if (!cntx->dbr_cq) { 396 fprintf(stderr, DEV "Couldn't create CQ\n"); 397 goto free; 398 } 399 cntx->db_recovery_page = mmap(NULL, dev->pg_size, PROT_READ | 400 PROT_WRITE, MAP_SHARED, 401 ibvctx->cmd_fd, BNXT_RE_DB_RECOVERY_PAGE); 402 if (cntx->db_recovery_page == MAP_FAILED) { 403 fprintf(stderr, DEV "Couldn't map DB recovery page\n"); 404 goto free; 405 } 406 /* Create pthread to handle the doorbell drop events. This thread is 407 * not going to handle any signals. Before creation block all the 408 * signals, and after creation restore the old signal mask. 409 */ 410 sigfillset(&block_sig_set); 411 pthread_sigmask(SIG_BLOCK, &block_sig_set, &old_sig_set); 412 ret = pthread_create(&cntx->dbr_thread, NULL, bnxt_re_dbr_thread, cntx); 413 if (ret) { 414 fprintf(stderr, DEV "Couldn't create pthread\n"); 415 pthread_sigmask(SIG_SETMASK, &old_sig_set, NULL); 416 goto free; 417 } 418 pthread_sigmask(SIG_SETMASK, &old_sig_set, NULL); 419 INIT_DBLY_LIST_HEAD(&cntx->qp_dbr_res.head); 420 pthread_spin_init(&cntx->qp_dbr_res.lock, PTHREAD_PROCESS_PRIVATE); 421 INIT_DBLY_LIST_HEAD(&cntx->cq_dbr_res.head); 422 pthread_spin_init(&cntx->cq_dbr_res.lock, PTHREAD_PROCESS_PRIVATE); 423 INIT_DBLY_LIST_HEAD(&cntx->srq_dbr_res.head); 424 pthread_spin_init(&cntx->srq_dbr_res.lock, PTHREAD_PROCESS_PRIVATE); 425 } 426 return(_bnxt_re_create_cq(ibvctx, ncqe, channel, vec, 0)); 427 free: 428 if (cntx->dbr_ev_chan) { 429 ret = ibv_destroy_comp_channel(cntx->dbr_ev_chan); 430 if (ret) 431 fprintf(stderr, DEV "ibv_destroy_comp_channel error\n"); 432 } 433 434 if (cntx->dbr_cq) { 435 if (cntx->db_recovery_page) 436 munmap(cntx->db_recovery_page, dev->pg_size); 437 ret = ibv_destroy_cq(cntx->dbr_cq); 438 if (ret) 439 fprintf(stderr, DEV "ibv_destroy_cq error\n"); 440 } 441 return NULL; 442 } 443 444 int bnxt_re_poll_kernel_cq(struct bnxt_re_cq *cq) 445 { 446 struct ibv_wc tmp_wc; 447 int rc; 448 449 rc = ibv_cmd_poll_cq(&cq->ibvcq, 1, &tmp_wc); 450 if (unlikely(rc)) 451 fprintf(stderr, "ibv_cmd_poll_cq failed: %d\n", rc); 452 return rc; 453 } 454 455 #define BNXT_RE_QUEUE_START_PHASE 0x01 456 457 /* 458 * Function to complete the last steps in CQ resize. Invoke poll function 459 * in the kernel driver; this serves as a signal to the driver to complete CQ 460 * resize steps required. Free memory mapped for the original CQ and switch 461 * over to the memory mapped for CQ with the new size. Finally Ack the Cutoff 462 * CQE. This function must be called under cq->cqq.lock. 463 */ 464 void bnxt_re_resize_cq_complete(struct bnxt_re_cq *cq) 465 { 466 struct bnxt_re_context *cntx = to_bnxt_re_context(cq->ibvcq.context); 467 468 bnxt_re_poll_kernel_cq(cq); 469 bnxt_re_free_mem(cq->mem); 470 471 cq->mem = cq->resize_mem; 472 cq->resize_mem = NULL; 473 /* As an exception no need to call get_ring api we know 474 * this is the only consumer 475 */ 476 cq->cqq->va = cq->mem->va_head; 477 /* 478 * We don't want to memcpy() the entire cqq structure below; otherwise 479 * we'd end up overwriting cq->cqq.lock that is held by the caller. 480 * So we copy the members piecemeal. cqq->head, cqq->tail implicitly 481 * set to 0 before cutoff_ack DB. 482 */ 483 cq->cqq->depth = cq->mem->pad; 484 cq->cqq->stride = cntx->rdev->cqe_size; 485 cq->cqq->head = 0; 486 cq->cqq->tail = 0; 487 cq->phase = BNXT_RE_QUEUE_START_PHASE; 488 /* Reset epoch portion of the flags */ 489 cq->cqq->flags &= ~(BNXT_RE_FLAG_EPOCH_TAIL_MASK | 490 BNXT_RE_FLAG_EPOCH_HEAD_MASK); 491 bnxt_re_ring_cq_arm_db(cq, BNXT_RE_QUE_TYPE_CQ_CUT_ACK); 492 } 493 494 int bnxt_re_resize_cq(struct ibv_cq *ibvcq, int ncqe) 495 { 496 struct bnxt_re_context *cntx = to_bnxt_re_context(ibvcq->context); 497 struct bnxt_re_dev *dev = to_bnxt_re_dev(ibvcq->context->device); 498 struct bnxt_re_cq *cq = to_bnxt_re_cq(ibvcq); 499 struct bnxt_re_resize_cq_req req = {}; 500 uint32_t exit_cnt = 20; 501 502 struct ibv_resize_cq_resp resp = {}; 503 int rc = 0; 504 505 if (ncqe > dev->max_cq_depth) 506 return -EINVAL; 507 508 bnxt_re_dp_spin_lock(&cq->cqq->qlock); 509 cq->resize_mem = bnxt_re_alloc_cqslab(cntx, ncqe, cq->cqq->depth); 510 if (unlikely(!cq->resize_mem)) { 511 rc = -ENOMEM; 512 goto done; 513 } 514 /* As an exception no need to call get_ring api we know 515 * this is the only consumer 516 */ 517 req.cq_va = (uint64_t)cq->resize_mem->va_head; 518 rc = ibv_cmd_resize_cq(ibvcq, ncqe, &req.cmd, 519 sizeof(req), &resp, sizeof(resp)); 520 if (unlikely(rc)) { 521 bnxt_re_free_mem(cq->resize_mem); 522 goto done; 523 } 524 525 while(true) { 526 struct ibv_wc tmp_wc = {0}; 527 uint32_t resize = 0; 528 int dqed = 0; 529 530 struct bnxt_re_work_compl *compl = NULL; 531 dqed = bnxt_re_poll_one(cq, 1, &tmp_wc, &resize); 532 if (resize) { 533 break; 534 } 535 if (dqed) { 536 compl = calloc(1, sizeof(*compl)); 537 if (unlikely(!compl)) { 538 fprintf(stderr, "%s: No Memory.. Continue\n", __func__); 539 break; 540 } 541 memcpy(&compl->wc, &tmp_wc, sizeof(tmp_wc)); 542 bnxt_re_list_add_node(&compl->cnode, &cq->prev_cq_head); 543 compl = NULL; 544 memset(&tmp_wc, 0, sizeof(tmp_wc)); 545 } else { 546 exit_cnt--; 547 if (unlikely(!exit_cnt)) { 548 rc = -EIO; 549 break; 550 } else { 551 /* wait for 100 milli seconds */ 552 bnxt_re_sub_sec_busy_wait(100 * 1000000); 553 } 554 } 555 } 556 done: 557 bnxt_re_dp_spin_unlock(&cq->cqq->qlock); 558 return rc; 559 } 560 561 static void bnxt_re_destroy_resize_cq_list(struct bnxt_re_cq *cq) 562 { 563 struct bnxt_re_list_node *cur, *tmp; 564 struct bnxt_re_work_compl *compl; 565 566 if (bnxt_re_list_empty(&cq->prev_cq_head)) 567 return; 568 569 list_for_each_node_safe(cur, tmp, &cq->prev_cq_head) { 570 compl = list_node(cur, struct bnxt_re_work_compl, cnode); 571 bnxt_re_list_del_node(&compl->cnode, &cq->prev_cq_head); 572 free(compl); 573 } 574 575 } 576 577 int bnxt_re_destroy_cq(struct ibv_cq *ibvcq) 578 { 579 struct bnxt_re_cq *cq = to_bnxt_re_cq(ibvcq); 580 int status; 581 582 if (_is_db_drop_recovery_enable(cq->cntx) && 583 ibvcq != cq->cntx->dbr_cq) { 584 pthread_spin_lock(&cq->cntx->cq_dbr_res.lock); 585 bnxt_re_list_del_node(&cq->dbnode, 586 &cq->cntx->cq_dbr_res.head); 587 pthread_spin_unlock(&cq->cntx->cq_dbr_res.lock); 588 } 589 status = ibv_cmd_destroy_cq(ibvcq); 590 if (status) { 591 if (_is_db_drop_recovery_enable(cq->cntx) && 592 ibvcq != cq->cntx->dbr_cq) { 593 pthread_spin_lock(&cq->cntx->cq_dbr_res.lock); 594 bnxt_re_list_add_node(&cq->dbnode, 595 &cq->cntx->cq_dbr_res.head); 596 pthread_spin_unlock(&cq->cntx->cq_dbr_res.lock); 597 } 598 return status; 599 } 600 bnxt_re_destroy_resize_cq_list(cq); 601 bnxt_re_free_mem(cq->mem); 602 free(cq); 603 return 0; 604 } 605 606 static uint8_t bnxt_re_poll_err_scqe(struct bnxt_re_qp *qp, 607 struct ibv_wc *ibvwc, 608 struct bnxt_re_req_cqe *scqe, 609 uint32_t flg_val, int *cnt) 610 { 611 struct bnxt_re_queue *sq = qp->jsqq->hwque; 612 struct bnxt_re_wrid *swrid; 613 struct bnxt_re_cq *scq; 614 uint8_t status; 615 uint32_t head; 616 617 scq = to_bnxt_re_cq(qp->ibvqp.send_cq); 618 619 head = qp->jsqq->last_idx; 620 swrid = &qp->jsqq->swque[head]; 621 622 *cnt = 1; 623 status = (flg_val >> BNXT_RE_BCQE_STATUS_SHIFT) & 624 BNXT_RE_BCQE_STATUS_MASK; 625 ibvwc->status = bnxt_re_req_to_ibv_status[status]; 626 ibvwc->wc_flags = 0; 627 ibvwc->wr_id = swrid->wrid; 628 ibvwc->qp_num = qp->qpid; 629 ibvwc->opcode = swrid->wc_opcd; 630 ibvwc->byte_len = 0; 631 632 bnxt_re_incr_head(sq, swrid->slots); 633 bnxt_re_jqq_mod_last(qp->jsqq, head); 634 635 if (qp->qpst != IBV_QPS_ERR) 636 qp->qpst = IBV_QPS_ERR; 637 bnxt_re_list_add_node(&qp->snode, &scq->sfhead); 638 bnxt_re_trace("%s: qp_num = 0x%x status = %d\n", 639 __func__, ibvwc->qp_num, ibvwc->status) 640 641 return false; 642 } 643 644 static uint8_t bnxt_re_poll_success_scqe(struct bnxt_re_qp *qp, 645 struct ibv_wc *ibvwc, 646 struct bnxt_re_req_cqe *scqe, int *cnt) 647 { 648 struct bnxt_re_queue *sq = qp->jsqq->hwque; 649 struct bnxt_re_wrid *swrid; 650 uint8_t pcqe = false; 651 uint32_t cindx, head; 652 653 head = qp->jsqq->last_idx; 654 swrid = &qp->jsqq->swque[head]; 655 cindx = le32toh(scqe->con_indx) % qp->cap.max_swr; 656 657 if (!(swrid->sig & IBV_SEND_SIGNALED)) { 658 *cnt = 0; 659 } else { 660 ibvwc->status = IBV_WC_SUCCESS; 661 ibvwc->wc_flags = 0; 662 ibvwc->qp_num = qp->qpid; 663 ibvwc->wr_id = swrid->wrid; 664 ibvwc->opcode = swrid->wc_opcd; 665 if (ibvwc->opcode == IBV_WC_RDMA_READ || 666 ibvwc->opcode == IBV_WC_COMP_SWAP || 667 ibvwc->opcode == IBV_WC_FETCH_ADD) 668 ibvwc->byte_len = swrid->bytes; 669 *cnt = 1; 670 } 671 bnxt_re_incr_head(sq, swrid->slots); 672 bnxt_re_jqq_mod_last(qp->jsqq, head); 673 if (qp->jsqq->last_idx != cindx) 674 pcqe = true; 675 676 return pcqe; 677 } 678 679 static uint8_t bnxt_re_poll_scqe(struct bnxt_re_qp *qp, struct ibv_wc *ibvwc, 680 void *cqe, uint32_t flg_val, int *cnt) 681 { 682 uint8_t status, pcqe = false; 683 684 status = (flg_val >> BNXT_RE_BCQE_STATUS_SHIFT) & 685 BNXT_RE_BCQE_STATUS_MASK; 686 if (status == BNXT_RE_REQ_ST_OK) 687 pcqe = bnxt_re_poll_success_scqe(qp, ibvwc, cqe, cnt); 688 else 689 pcqe = bnxt_re_poll_err_scqe(qp, ibvwc, cqe, flg_val, cnt); 690 691 return pcqe; 692 } 693 694 static void bnxt_re_release_srqe(struct bnxt_re_srq *srq, int tag) 695 { 696 bnxt_re_dp_spin_lock(&srq->srqq->qlock); 697 srq->srwrid[srq->last_idx].next_idx = tag; 698 srq->last_idx = tag; 699 srq->srwrid[srq->last_idx].next_idx = -1; 700 bnxt_re_dp_spin_unlock(&srq->srqq->qlock); 701 } 702 703 static int bnxt_re_poll_err_rcqe(struct bnxt_re_qp *qp, struct ibv_wc *ibvwc, 704 struct bnxt_re_bcqe *hdr, 705 uint32_t flg_val, void *cqe) 706 { 707 struct bnxt_re_wrid *swque; 708 struct bnxt_re_queue *rq; 709 struct bnxt_re_cq *rcq; 710 uint8_t status, cnt; 711 uint32_t head = 0; 712 713 rcq = to_bnxt_re_cq(qp->ibvqp.recv_cq); 714 715 status = (flg_val >> BNXT_RE_BCQE_STATUS_SHIFT) & 716 BNXT_RE_BCQE_STATUS_MASK; 717 /* skip h/w flush errors */ 718 if (status == BNXT_RE_RSP_ST_HW_FLUSH) 719 return 0; 720 721 if (!qp->srq) { 722 rq = qp->jrqq->hwque; 723 head = qp->jrqq->last_idx; 724 swque = &qp->jrqq->swque[head]; 725 ibvwc->wr_id = swque->wrid; 726 cnt = swque->slots; 727 } else { 728 struct bnxt_re_srq *srq; 729 int tag; 730 731 srq = qp->srq; 732 rq = srq->srqq; 733 cnt = 1; 734 tag = le32toh(hdr->qphi_rwrid) & BNXT_RE_BCQE_RWRID_MASK; 735 ibvwc->wr_id = srq->srwrid[tag].wrid; 736 bnxt_re_release_srqe(srq, tag); 737 } 738 739 ibvwc->status = bnxt_re_res_to_ibv_status[status]; 740 ibvwc->qp_num = qp->qpid; 741 ibvwc->opcode = IBV_WC_RECV; 742 ibvwc->byte_len = 0; 743 ibvwc->wc_flags = 0; 744 if (qp->qptyp == IBV_QPT_UD) 745 ibvwc->src_qp = 0; 746 747 if (!qp->srq) 748 bnxt_re_jqq_mod_last(qp->jrqq, head); 749 bnxt_re_incr_head(rq, cnt); 750 751 if (!qp->srq) 752 bnxt_re_list_add_node(&qp->rnode, &rcq->rfhead); 753 754 bnxt_re_trace("%s: qp_num = 0x%x status = %d\n", 755 __func__, ibvwc->qp_num, ibvwc->status) 756 return 1; 757 } 758 759 static void bnxt_re_fill_ud_cqe(struct ibv_wc *ibvwc, 760 struct bnxt_re_bcqe *hdr, void *cqe, 761 uint8_t flags) 762 { 763 struct bnxt_re_ud_cqe *ucqe = cqe; 764 uint32_t qpid; 765 766 qpid = ((le32toh(hdr->qphi_rwrid) >> BNXT_RE_BCQE_SRCQP_SHIFT) & 767 BNXT_RE_BCQE_SRCQP_SHIFT) << 0x10; /* higher 8 bits of 24 */ 768 qpid |= (le64toh(ucqe->qplo_mac) >> BNXT_RE_UD_CQE_SRCQPLO_SHIFT) & 769 BNXT_RE_UD_CQE_SRCQPLO_MASK; /*lower 16 of 24 */ 770 ibvwc->src_qp = qpid; 771 ibvwc->wc_flags |= IBV_WC_GRH; 772 ibvwc->sl = (flags & BNXT_RE_UD_FLAGS_IP_VER_MASK) >> 773 BNXT_RE_UD_FLAGS_IP_VER_SFT; 774 /*IB-stack ABI in user do not ask for MAC to be reported. */ 775 } 776 777 static void bnxt_re_poll_success_rcqe(struct bnxt_re_qp *qp, 778 struct ibv_wc *ibvwc, 779 struct bnxt_re_bcqe *hdr, 780 uint32_t flg_val, void *cqe) 781 { 782 uint8_t flags, is_imm, is_rdma; 783 struct bnxt_re_rc_cqe *rcqe; 784 struct bnxt_re_wrid *swque; 785 struct bnxt_re_queue *rq; 786 uint32_t head = 0; 787 uint32_t rcqe_len; 788 uint8_t cnt; 789 790 rcqe = cqe; 791 if (!qp->srq) { 792 rq = qp->jrqq->hwque; 793 head = qp->jrqq->last_idx; 794 swque = &qp->jrqq->swque[head]; 795 cnt = swque->slots; 796 ibvwc->wr_id = swque->wrid; 797 } else { 798 struct bnxt_re_srq *srq; 799 int tag; 800 801 srq = qp->srq; 802 rq = srq->srqq; 803 cnt = 1; 804 tag = le32toh(hdr->qphi_rwrid) & BNXT_RE_BCQE_RWRID_MASK; 805 ibvwc->wr_id = srq->srwrid[tag].wrid; 806 bnxt_re_release_srqe(srq, tag); 807 } 808 809 ibvwc->status = IBV_WC_SUCCESS; 810 ibvwc->qp_num = qp->qpid; 811 rcqe_len = le32toh(rcqe->length); 812 ibvwc->byte_len = (qp->qptyp == IBV_QPT_UD) ? 813 rcqe_len & BNXT_RE_UD_CQE_LEN_MASK : rcqe_len; 814 ibvwc->opcode = IBV_WC_RECV; 815 816 flags = (flg_val >> BNXT_RE_BCQE_FLAGS_SHIFT) & 817 BNXT_RE_BCQE_FLAGS_MASK; 818 is_imm = (flags & BNXT_RE_RC_FLAGS_IMM_MASK) >> 819 BNXT_RE_RC_FLAGS_IMM_SHIFT; 820 is_rdma = (flags & BNXT_RE_RC_FLAGS_RDMA_MASK) >> 821 BNXT_RE_RC_FLAGS_RDMA_SHIFT; 822 ibvwc->wc_flags = 0; 823 if (is_imm) { 824 ibvwc->wc_flags |= IBV_WC_WITH_IMM; 825 /* The HW is returning imm_data in little-endian format, 826 * swap to Big Endian as expected by application 827 */ 828 ibvwc->imm_data = htobe32(le32toh(rcqe->imm_key)); 829 if (is_rdma) 830 ibvwc->opcode = IBV_WC_RECV_RDMA_WITH_IMM; 831 } 832 833 if (qp->qptyp == IBV_QPT_UD) { 834 bnxt_re_fill_ud_cqe(ibvwc, hdr, cqe, flags); 835 } 836 837 if (!qp->srq) 838 bnxt_re_jqq_mod_last(qp->jrqq, head); 839 bnxt_re_incr_head(rq, cnt); 840 } 841 842 static uint8_t bnxt_re_poll_rcqe(struct bnxt_re_qp *qp, struct ibv_wc *ibvwc, 843 void *cqe, uint32_t flg_val, int *cnt) 844 { 845 struct bnxt_re_bcqe *hdr; 846 uint8_t status, pcqe = false; 847 848 hdr = cqe + sizeof(struct bnxt_re_rc_cqe); 849 850 status = (flg_val >> BNXT_RE_BCQE_STATUS_SHIFT) & 851 BNXT_RE_BCQE_STATUS_MASK; 852 *cnt = 1; 853 if (status == BNXT_RE_RSP_ST_OK) 854 bnxt_re_poll_success_rcqe(qp, ibvwc, hdr, flg_val, cqe); 855 else 856 *cnt = bnxt_re_poll_err_rcqe(qp, ibvwc, hdr, flg_val, cqe); 857 858 return pcqe; 859 } 860 861 static void bnxt_re_qp_move_flush_err(struct bnxt_re_qp *qp) 862 { 863 struct bnxt_re_cq *scq, *rcq; 864 865 scq = to_bnxt_re_cq(qp->ibvqp.send_cq); 866 rcq = to_bnxt_re_cq(qp->ibvqp.recv_cq); 867 868 if (qp->qpst != IBV_QPS_ERR) 869 qp->qpst = IBV_QPS_ERR; 870 bnxt_re_list_add_node(&qp->rnode, &rcq->rfhead); 871 bnxt_re_list_add_node(&qp->snode, &scq->sfhead); 872 } 873 874 /* Always return false */ 875 static uint8_t bnxt_re_poll_term_cqe(struct bnxt_re_qp *qp, int *cnt) 876 { 877 /* For now just add the QP to flush list without 878 * considering the index reported in the CQE. 879 * Continue reporting flush completions until the 880 * SQ and RQ are empty. 881 */ 882 *cnt = 0; 883 if (qp->qpst != IBV_QPS_RESET) 884 bnxt_re_qp_move_flush_err(qp); 885 886 return false; 887 } 888 889 static int bnxt_re_poll_one(struct bnxt_re_cq *cq, int nwc, struct ibv_wc *wc, 890 uint32_t *resize) 891 { 892 int type, cnt = 0, dqed = 0, hw_polled = 0; 893 struct bnxt_re_queue *cqq = cq->cqq; 894 struct bnxt_re_req_cqe *scqe; 895 struct bnxt_re_ud_cqe *rcqe; 896 uint64_t *qp_handle = NULL; 897 struct bnxt_re_bcqe *hdr; 898 struct bnxt_re_qp *qp; 899 uint8_t pcqe = false; 900 uint32_t flg_val; 901 void *cqe; 902 903 while (nwc) { 904 cqe = cqq->va + cqq->head * bnxt_re_get_cqe_sz(); 905 hdr = cqe + sizeof(struct bnxt_re_req_cqe); 906 flg_val = le32toh(hdr->flg_st_typ_ph); 907 if (unlikely(!bnxt_re_is_cqe_valid(flg_val, cq->phase))) 908 break; 909 type = (flg_val >> BNXT_RE_BCQE_TYPE_SHIFT) & 910 BNXT_RE_BCQE_TYPE_MASK; 911 switch (type) { 912 case BNXT_RE_WC_TYPE_SEND: 913 scqe = cqe; 914 qp_handle = (uint64_t *)&scqe->qp_handle; 915 qp = (struct bnxt_re_qp *) 916 (uintptr_t)le64toh(scqe->qp_handle); 917 if (!qp) 918 break; /*stale cqe. should be rung.*/ 919 pcqe = bnxt_re_poll_scqe(qp, wc, cqe, flg_val, &cnt); 920 break; 921 case BNXT_RE_WC_TYPE_RECV_RC: 922 case BNXT_RE_WC_TYPE_RECV_UD: 923 rcqe = cqe; 924 qp_handle = (uint64_t *)&rcqe->qp_handle; 925 qp = (struct bnxt_re_qp *) 926 (uintptr_t)le64toh(rcqe->qp_handle); 927 if (!qp) 928 break; /*stale cqe. should be rung.*/ 929 pcqe = bnxt_re_poll_rcqe(qp, wc, cqe, flg_val, &cnt); 930 break; 931 case BNXT_RE_WC_TYPE_RECV_RAW: 932 break; 933 case BNXT_RE_WC_TYPE_TERM: 934 scqe = cqe; 935 qp_handle = (uint64_t *)&scqe->qp_handle; 936 qp = (struct bnxt_re_qp *) 937 (uintptr_t)le64toh(scqe->qp_handle); 938 if (!qp) 939 break; 940 pcqe = bnxt_re_poll_term_cqe(qp, &cnt); 941 break; 942 case BNXT_RE_WC_TYPE_COFF: 943 /* Stop further processing and return */ 944 bnxt_re_resize_cq_complete(cq); 945 if (unlikely(resize)) 946 *resize = 1; 947 return dqed; 948 default: 949 break; 950 }; 951 952 if (pcqe) 953 goto skipp_real; 954 955 hw_polled++; 956 if (qp_handle) { 957 *qp_handle = 0x0ULL; /* mark cqe as read */ 958 qp_handle = NULL; 959 } 960 bnxt_re_incr_head(cq->cqq, 1); 961 bnxt_re_change_cq_phase(cq); 962 skipp_real: 963 if (cnt) { 964 cnt = 0; 965 dqed++; 966 nwc--; 967 wc++; 968 } 969 } 970 971 if (likely(hw_polled)) 972 bnxt_re_ring_cq_db(cq); 973 974 return dqed; 975 } 976 977 static int bnxt_re_poll_flush_wcs(struct bnxt_re_joint_queue *jqq, 978 struct ibv_wc *ibvwc, uint32_t qpid, 979 int nwc) 980 { 981 struct bnxt_re_queue *que; 982 struct bnxt_re_wrid *wrid; 983 uint32_t cnt = 0; 984 985 que = jqq->hwque; 986 while(nwc) { 987 if (bnxt_re_is_que_empty(que)) 988 break; 989 wrid = &jqq->swque[jqq->last_idx]; 990 ibvwc->status = IBV_WC_WR_FLUSH_ERR; 991 ibvwc->opcode = wrid->wc_opcd; 992 ibvwc->wr_id = wrid->wrid; 993 ibvwc->qp_num = qpid; 994 ibvwc->byte_len = 0; 995 ibvwc->wc_flags = 0; 996 997 bnxt_re_jqq_mod_last(jqq, jqq->last_idx); 998 bnxt_re_incr_head(que, wrid->slots); 999 nwc--; 1000 cnt++; 1001 ibvwc++; 1002 } 1003 1004 return cnt; 1005 } 1006 1007 static int bnxt_re_poll_flush_wqes(struct bnxt_re_cq *cq, 1008 struct bnxt_re_list_head *lhead, 1009 struct ibv_wc *ibvwc, 1010 uint32_t nwc) 1011 { 1012 struct bnxt_re_list_node *cur, *tmp; 1013 struct bnxt_re_joint_queue *jqq; 1014 struct bnxt_re_qp *qp; 1015 bool sq_list = false; 1016 uint32_t polled = 0; 1017 1018 sq_list = (lhead == &cq->sfhead) ? true : false; 1019 if (!bnxt_re_list_empty(lhead)) { 1020 list_for_each_node_safe(cur, tmp, lhead) { 1021 if (sq_list) { 1022 qp = list_node(cur, struct bnxt_re_qp, snode); 1023 jqq = qp->jsqq; 1024 } else { 1025 qp = list_node(cur, struct bnxt_re_qp, rnode); 1026 jqq = qp->jrqq; 1027 if (!jqq) /* Using srq no need to flush */ 1028 goto done; 1029 } 1030 1031 if (bnxt_re_is_que_empty(jqq->hwque)) 1032 continue; 1033 polled += bnxt_re_poll_flush_wcs(jqq, ibvwc + polled, 1034 qp->qpid, nwc - polled); 1035 if (!(nwc - polled)) 1036 break; 1037 } 1038 } 1039 done: 1040 return polled; 1041 } 1042 1043 static int bnxt_re_poll_flush_lists(struct bnxt_re_cq *cq, uint32_t nwc, 1044 struct ibv_wc *ibvwc) 1045 { 1046 int left, polled = 0; 1047 1048 polled = bnxt_re_poll_flush_wqes(cq, &cq->sfhead, ibvwc, nwc); 1049 left = nwc - polled; 1050 1051 if (!left) 1052 return polled; 1053 1054 polled += bnxt_re_poll_flush_wqes(cq, &cq->rfhead, 1055 ibvwc + polled, left); 1056 return polled; 1057 } 1058 1059 static int bnxt_re_poll_resize_cq_list(struct bnxt_re_cq *cq, uint32_t nwc, 1060 struct ibv_wc *ibvwc) 1061 { 1062 struct bnxt_re_list_node *cur, *tmp; 1063 struct bnxt_re_work_compl *compl; 1064 int left; 1065 1066 left = nwc; 1067 list_for_each_node_safe(cur, tmp, &cq->prev_cq_head) { 1068 compl = list_node(cur, struct bnxt_re_work_compl, cnode); 1069 if (!left) 1070 break; 1071 memcpy(ibvwc, &compl->wc, sizeof(*ibvwc)); 1072 ibvwc++; 1073 left--; 1074 bnxt_re_list_del_node(&compl->cnode, &cq->prev_cq_head); 1075 free(compl); 1076 } 1077 1078 return nwc - left; 1079 } 1080 1081 1082 int bnxt_re_poll_cq(struct ibv_cq *ibvcq, int nwc, struct ibv_wc *wc) 1083 { 1084 int dqed = 0, left = 0; 1085 struct bnxt_re_cq *cq; 1086 uint32_t resize = 0; 1087 1088 cq = container_of(ibvcq, struct bnxt_re_cq, ibvcq); 1089 bnxt_re_dp_spin_lock(&cq->cqq->qlock); 1090 1091 left = nwc; 1092 /* Check whether we have anything to be completed from prev cq context */ 1093 if (unlikely(!bnxt_re_list_empty(&cq->prev_cq_head))) { 1094 dqed = bnxt_re_poll_resize_cq_list(cq, nwc, wc); 1095 left = nwc - dqed; 1096 if (!left) { 1097 bnxt_re_dp_spin_unlock(&cq->cqq->qlock); 1098 return dqed; 1099 } 1100 } 1101 1102 dqed += bnxt_re_poll_one(cq, left, wc + dqed, &resize); 1103 /* Check if anything is there to flush. */ 1104 left = nwc - dqed; 1105 if (left && (!bnxt_re_list_empty(&cq->sfhead) || 1106 !bnxt_re_list_empty(&cq->rfhead))) 1107 dqed += bnxt_re_poll_flush_lists(cq, left, (wc + dqed)); 1108 bnxt_re_dp_spin_unlock(&cq->cqq->qlock); 1109 1110 return dqed; 1111 } 1112 1113 void bnxt_re_cleanup_cq(struct bnxt_re_qp *qp, struct bnxt_re_cq *cq) 1114 { 1115 struct bnxt_re_queue *que = cq->cqq; 1116 struct bnxt_re_req_cqe *scqe; 1117 struct bnxt_re_rc_cqe *rcqe; 1118 struct bnxt_re_bcqe *hdr; 1119 int indx, type; 1120 void *cqe; 1121 1122 1123 bnxt_re_dp_spin_lock(&que->qlock); 1124 for(indx = 0; indx < que->depth; indx++) { 1125 cqe = que->va + indx * bnxt_re_get_cqe_sz(); 1126 hdr = cqe + sizeof(struct bnxt_re_req_cqe); 1127 type = (hdr->flg_st_typ_ph >> BNXT_RE_BCQE_TYPE_SHIFT) & 1128 BNXT_RE_BCQE_TYPE_MASK; 1129 1130 if (type == BNXT_RE_WC_TYPE_COFF) 1131 continue; 1132 if (type == BNXT_RE_WC_TYPE_SEND || 1133 type == BNXT_RE_WC_TYPE_TERM) { 1134 scqe = cqe; 1135 if (scqe->qp_handle == (uint64_t)qp) 1136 scqe->qp_handle = 0ULL; 1137 } else { 1138 rcqe = cqe; 1139 if (rcqe->qp_handle == (uint64_t)qp) 1140 rcqe->qp_handle = 0ULL; 1141 } 1142 1143 } 1144 1145 if (_is_db_drop_recovery_enable(cq->cntx)) { 1146 pthread_spin_lock(&cq->cntx->cq_dbr_res.lock); 1147 bnxt_re_list_del_node(&cq->dbnode, &cq->cntx->cq_dbr_res.head); 1148 pthread_spin_unlock(&cq->cntx->cq_dbr_res.lock); 1149 } 1150 bnxt_re_list_del_node(&qp->snode, &cq->sfhead); 1151 bnxt_re_list_del_node(&qp->rnode, &cq->rfhead); 1152 bnxt_re_dp_spin_unlock(&que->qlock); 1153 } 1154 1155 void bnxt_re_cq_event(struct ibv_cq *ibvcq) 1156 { 1157 1158 } 1159 1160 int bnxt_re_arm_cq(struct ibv_cq *ibvcq, int flags) 1161 { 1162 struct bnxt_re_cq *cq = to_bnxt_re_cq(ibvcq); 1163 1164 bnxt_re_dp_spin_lock(&cq->cqq->qlock); 1165 flags = !flags ? BNXT_RE_QUE_TYPE_CQ_ARMALL : 1166 BNXT_RE_QUE_TYPE_CQ_ARMSE; 1167 1168 bnxt_re_ring_cq_arm_db(cq, flags); 1169 bnxt_re_dp_spin_unlock(&cq->cqq->qlock); 1170 1171 return 0; 1172 } 1173 1174 static int bnxt_re_check_qp_limits(struct bnxt_re_context *cntx, 1175 struct ibv_qp_init_attr *attr) 1176 { 1177 struct ibv_device_attr *devattr; 1178 struct bnxt_re_dev *rdev; 1179 1180 rdev = cntx->rdev; 1181 devattr = &rdev->devattr; 1182 if (attr->qp_type != IBV_QPT_RC && attr->qp_type != IBV_QPT_UD) 1183 return EINVAL; 1184 if (attr->cap.max_send_sge > devattr->max_sge) 1185 return EINVAL; 1186 if (attr->cap.max_recv_sge > devattr->max_sge) 1187 return EINVAL; 1188 if (cntx->modes & BNXT_RE_WQE_MODE_VARIABLE) { 1189 if (attr->cap.max_inline_data > BNXT_RE_MAX_INLINE_SIZE_VAR_WQE) 1190 return -EINVAL; 1191 } else if (attr->cap.max_inline_data > BNXT_RE_MAX_INLINE_SIZE) { 1192 return EINVAL; 1193 } 1194 if (attr->cap.max_send_wr > devattr->max_qp_wr) 1195 attr->cap.max_send_wr = devattr->max_qp_wr; 1196 if (attr->cap.max_recv_wr > devattr->max_qp_wr) 1197 attr->cap.max_recv_wr = devattr->max_qp_wr; 1198 1199 return 0; 1200 } 1201 1202 static int bnxt_re_get_rq_slots(struct bnxt_re_dev *rdev, uint8_t qpmode, 1203 uint32_t nrwr, uint32_t nsge, uint32_t *esz) 1204 { 1205 uint32_t max_wqesz; 1206 uint32_t wqe_size; 1207 uint32_t stride; 1208 uint32_t slots; 1209 1210 stride = sizeof(struct bnxt_re_sge); 1211 max_wqesz = bnxt_re_calc_wqe_sz(rdev->devattr.max_sge); 1212 1213 wqe_size = bnxt_re_calc_wqe_sz(nsge); 1214 if (wqe_size > max_wqesz) 1215 return -EINVAL; 1216 1217 if (qpmode == BNXT_RE_WQE_MODE_STATIC) 1218 wqe_size = bnxt_re_calc_wqe_sz(6); 1219 1220 if (esz) 1221 *esz = wqe_size; 1222 1223 slots = (nrwr * wqe_size) / stride; 1224 return slots; 1225 } 1226 1227 static int bnxt_re_get_sq_slots(struct bnxt_re_dev *rdev, 1228 uint8_t qpmode, uint32_t nswr, 1229 uint32_t nsge, uint32_t ils, uint32_t *esize) 1230 { 1231 uint32_t max_wqesz; 1232 uint32_t wqe_size; 1233 uint32_t cal_ils; 1234 uint32_t stride; 1235 uint32_t ilsize; 1236 uint32_t hdr_sz; 1237 uint32_t slots; 1238 1239 hdr_sz = bnxt_re_get_sqe_hdr_sz(); 1240 stride = sizeof(struct bnxt_re_sge); 1241 max_wqesz = bnxt_re_calc_wqe_sz(rdev->devattr.max_sge); 1242 ilsize = get_aligned(ils, hdr_sz); 1243 1244 wqe_size = bnxt_re_calc_wqe_sz(nsge); 1245 if (ilsize) { 1246 cal_ils = hdr_sz + ilsize; 1247 wqe_size = MAX(cal_ils, wqe_size); 1248 wqe_size = get_aligned(wqe_size, hdr_sz); 1249 } 1250 if (wqe_size > max_wqesz) 1251 return -EINVAL; 1252 1253 if (qpmode == BNXT_RE_WQE_MODE_STATIC) 1254 wqe_size = bnxt_re_calc_wqe_sz(6); 1255 1256 if (esize) 1257 *esize = wqe_size; 1258 slots = (nswr * wqe_size) / stride; 1259 return slots; 1260 } 1261 1262 static int bnxt_re_get_sqmem_size(struct bnxt_re_context *cntx, 1263 struct ibv_qp_init_attr *attr, 1264 struct bnxt_re_qattr *qattr) 1265 { 1266 uint32_t nsge, nswr, diff = 0; 1267 size_t bytes = 0; 1268 uint32_t npsn; 1269 uint32_t ils; 1270 uint8_t mode; 1271 uint32_t esz; 1272 int nslots; 1273 1274 mode = cntx->modes & BNXT_RE_WQE_MODE_VARIABLE; 1275 nsge = attr->cap.max_send_sge; 1276 diff = bnxt_re_get_diff(cntx->comp_mask); 1277 nswr = attr->cap.max_send_wr + 1 + diff; 1278 nswr = bnxt_re_init_depth(nswr, cntx->comp_mask); 1279 ils = attr->cap.max_inline_data; 1280 nslots = bnxt_re_get_sq_slots(cntx->rdev, mode, nswr, 1281 nsge, ils, &esz); 1282 if (nslots < 0) 1283 return nslots; 1284 npsn = bnxt_re_get_npsn(mode, nswr, nslots); 1285 if (BNXT_RE_HW_RETX(cntx)) 1286 npsn = roundup_pow_of_two(npsn); 1287 1288 qattr->nwr = nswr; 1289 qattr->slots = nslots; 1290 qattr->esize = esz; 1291 1292 bytes = nslots * sizeof(struct bnxt_re_sge); /* ring */ 1293 bytes += npsn * bnxt_re_get_psne_size(cntx); /* psn */ 1294 qattr->sz_ring = get_aligned(bytes, cntx->rdev->pg_size); 1295 qattr->sz_shad = nswr * sizeof(struct bnxt_re_wrid); /* shadow */ 1296 return 0; 1297 } 1298 1299 static int bnxt_re_get_rqmem_size(struct bnxt_re_context *cntx, 1300 struct ibv_qp_init_attr *attr, 1301 struct bnxt_re_qattr *qattr) 1302 { 1303 uint32_t nrwr, nsge; 1304 size_t bytes = 0; 1305 uint32_t esz; 1306 int nslots; 1307 1308 nsge = attr->cap.max_recv_sge; 1309 nrwr = attr->cap.max_recv_wr + 1; 1310 nrwr = bnxt_re_init_depth(nrwr, cntx->comp_mask); 1311 nslots = bnxt_re_get_rq_slots(cntx->rdev, cntx->modes, 1312 nrwr, nsge, &esz); 1313 if (nslots < 0) 1314 return nslots; 1315 qattr->nwr = nrwr; 1316 qattr->slots = nslots; 1317 qattr->esize = esz; 1318 1319 bytes = nslots * sizeof(struct bnxt_re_sge); 1320 qattr->sz_ring = get_aligned(bytes, cntx->rdev->pg_size); 1321 qattr->sz_shad = nrwr * sizeof(struct bnxt_re_wrid); 1322 return 0; 1323 } 1324 1325 static int bnxt_re_get_qpmem_size(struct bnxt_re_context *cntx, 1326 struct ibv_qp_init_attr *attr, 1327 struct bnxt_re_qattr *qattr) 1328 { 1329 int size = 0; 1330 int tmp; 1331 int rc; 1332 1333 size = sizeof(struct bnxt_re_qp); 1334 tmp = sizeof(struct bnxt_re_joint_queue); 1335 tmp += sizeof(struct bnxt_re_queue); 1336 size += tmp; 1337 1338 rc = bnxt_re_get_sqmem_size(cntx, attr, &qattr[BNXT_RE_QATTR_SQ_INDX]); 1339 if (rc < 0) 1340 return -EINVAL; 1341 size += qattr[BNXT_RE_QATTR_SQ_INDX].sz_ring; 1342 size += qattr[BNXT_RE_QATTR_SQ_INDX].sz_shad; 1343 1344 if (!attr->srq) { 1345 tmp = sizeof(struct bnxt_re_joint_queue); 1346 tmp += sizeof(struct bnxt_re_queue); 1347 size += tmp; 1348 rc = bnxt_re_get_rqmem_size(cntx, attr, 1349 &qattr[BNXT_RE_QATTR_RQ_INDX]); 1350 if (rc < 0) 1351 return -EINVAL; 1352 size += qattr[BNXT_RE_QATTR_RQ_INDX].sz_ring; 1353 size += qattr[BNXT_RE_QATTR_RQ_INDX].sz_shad; 1354 } 1355 return size; 1356 } 1357 1358 static void *bnxt_re_alloc_qpslab(struct bnxt_re_context *cntx, 1359 struct ibv_qp_init_attr *attr, 1360 struct bnxt_re_qattr *qattr) 1361 { 1362 int bytes; 1363 1364 bytes = bnxt_re_get_qpmem_size(cntx, attr, qattr); 1365 if (bytes < 0) 1366 return NULL; 1367 return bnxt_re_alloc_mem(bytes, cntx->rdev->pg_size); 1368 } 1369 1370 static int bnxt_re_alloc_queue_ptr(struct bnxt_re_qp *qp, 1371 struct ibv_qp_init_attr *attr) 1372 { 1373 int rc = -ENOMEM; 1374 int jqsz, qsz; 1375 1376 jqsz = sizeof(struct bnxt_re_joint_queue); 1377 qsz = sizeof(struct bnxt_re_queue); 1378 qp->jsqq = bnxt_re_get_obj(qp->mem, jqsz); 1379 if (!qp->jsqq) 1380 return rc; 1381 qp->jsqq->hwque = bnxt_re_get_obj(qp->mem, qsz); 1382 if (!qp->jsqq->hwque) 1383 goto fail; 1384 1385 if (!attr->srq) { 1386 qp->jrqq = bnxt_re_get_obj(qp->mem, jqsz); 1387 if (!qp->jrqq) 1388 goto fail; 1389 qp->jrqq->hwque = bnxt_re_get_obj(qp->mem, qsz); 1390 if (!qp->jrqq->hwque) 1391 goto fail; 1392 } 1393 1394 return 0; 1395 fail: 1396 return rc; 1397 } 1398 1399 static int bnxt_re_alloc_init_swque(struct bnxt_re_joint_queue *jqq, 1400 struct bnxt_re_mem *mem, 1401 struct bnxt_re_qattr *qattr) 1402 { 1403 int indx; 1404 1405 jqq->swque = bnxt_re_get_obj(mem, qattr->sz_shad); 1406 if (!jqq->swque) 1407 return -ENOMEM; 1408 jqq->start_idx = 0; 1409 jqq->last_idx = qattr->nwr - 1; 1410 for (indx = 0; indx < qattr->nwr; indx++) 1411 jqq->swque[indx].next_idx = indx + 1; 1412 jqq->swque[jqq->last_idx].next_idx = 0; 1413 jqq->last_idx = 0; 1414 1415 return 0; 1416 } 1417 1418 static inline int bnxt_log2(int n) 1419 { 1420 int t; 1421 1422 if (n <= 0) 1423 return -1; 1424 1425 t = 0; 1426 while ((1 << t) < n) 1427 ++t; 1428 1429 return t; 1430 } 1431 1432 static int bnxt_re_alloc_queues(struct bnxt_re_qp *qp, 1433 struct ibv_qp_init_attr *attr, 1434 struct bnxt_re_qattr *qattr) 1435 { 1436 struct bnxt_re_context *cntx; 1437 struct bnxt_re_queue *que; 1438 uint32_t psn_size; 1439 uint8_t indx; 1440 int ret; 1441 1442 cntx = qp->cntx; 1443 1444 indx = BNXT_RE_QATTR_SQ_INDX; 1445 que = qp->jsqq->hwque; 1446 que->stride = sizeof(struct bnxt_re_sge); 1447 que->depth = qattr[indx].slots; 1448 que->diff = (bnxt_re_get_diff(cntx->comp_mask) * qattr[indx].esize) / 1449 que->stride; 1450 que->va = bnxt_re_get_ring(qp->mem, qattr[indx].sz_ring); 1451 if (!que->va) 1452 return -ENOMEM; 1453 /* PSN-search memory is allocated without checking for 1454 * QP-Type. Kernel driver do not map this memory if it 1455 * is UD-qp. UD-qp use this memory to maintain WC-opcode. 1456 * See definition of bnxt_re_fill_psns() for the use case. 1457 */ 1458 que->pad = (que->va + que->depth * que->stride); 1459 psn_size = bnxt_re_get_psne_size(qp->cntx); 1460 que->pad_stride_log2 = (uint32_t)bnxt_log2((double)psn_size); 1461 1462 ret = bnxt_re_alloc_init_swque(qp->jsqq, qp->mem, &qattr[indx]); 1463 if (ret) 1464 goto fail; 1465 1466 qp->cap.max_swr = qattr[indx].nwr; 1467 qp->jsqq->cntx = qp->cntx; 1468 que->dbtail = (qp->qpmode == BNXT_RE_WQE_MODE_VARIABLE) ? 1469 &que->tail : &qp->jsqq->start_idx; 1470 1471 /* Init and adjust MSN table size according to qp mode */ 1472 if (!BNXT_RE_HW_RETX(qp->cntx)) 1473 goto skip_msn; 1474 que->msn = 0; 1475 que->msn_tbl_sz = 0; 1476 if (qp->qpmode & BNXT_RE_WQE_MODE_VARIABLE) 1477 que->msn_tbl_sz = roundup_pow_of_two(qattr->slots) / 2; 1478 else 1479 que->msn_tbl_sz = roundup_pow_of_two(qattr->nwr); 1480 skip_msn: 1481 bnxt_re_dp_spin_init(&que->qlock, PTHREAD_PROCESS_PRIVATE, !bnxt_single_threaded); 1482 1483 if (qp->jrqq) { 1484 indx = BNXT_RE_QATTR_RQ_INDX; 1485 que = qp->jrqq->hwque; 1486 que->stride = sizeof(struct bnxt_re_sge); 1487 que->depth = qattr[indx].slots; 1488 que->max_slots = qattr[indx].esize / que->stride; 1489 que->dbtail = &qp->jrqq->start_idx; 1490 que->va = bnxt_re_get_ring(qp->mem, qattr[indx].sz_ring); 1491 if (!que->va) 1492 return -ENOMEM; 1493 /* For RQ only bnxt_re_wri.wrid is used. */ 1494 ret = bnxt_re_alloc_init_swque(qp->jrqq, qp->mem, &qattr[indx]); 1495 if (ret) 1496 goto fail; 1497 1498 bnxt_re_dp_spin_init(&que->qlock, PTHREAD_PROCESS_PRIVATE, !bnxt_single_threaded); 1499 qp->cap.max_rwr = qattr[indx].nwr; 1500 qp->jrqq->cntx = qp->cntx; 1501 } 1502 1503 return 0; 1504 fail: 1505 return ret; 1506 } 1507 1508 void bnxt_re_async_event(struct ibv_async_event *event) 1509 { 1510 struct ibv_qp *ibvqp; 1511 struct bnxt_re_qp *qp; 1512 1513 switch (event->event_type) { 1514 case IBV_EVENT_CQ_ERR: 1515 break; 1516 case IBV_EVENT_SRQ_ERR: 1517 case IBV_EVENT_QP_FATAL: 1518 case IBV_EVENT_QP_REQ_ERR: 1519 case IBV_EVENT_QP_ACCESS_ERR: 1520 case IBV_EVENT_PATH_MIG_ERR: { 1521 ibvqp = event->element.qp; 1522 qp = to_bnxt_re_qp(ibvqp); 1523 bnxt_re_qp_move_flush_err(qp); 1524 break; 1525 } 1526 case IBV_EVENT_SQ_DRAINED: 1527 case IBV_EVENT_PATH_MIG: 1528 case IBV_EVENT_COMM_EST: 1529 case IBV_EVENT_QP_LAST_WQE_REACHED: 1530 case IBV_EVENT_SRQ_LIMIT_REACHED: 1531 case IBV_EVENT_PORT_ACTIVE: 1532 case IBV_EVENT_PORT_ERR: 1533 default: 1534 break; 1535 } 1536 } 1537 1538 struct ibv_qp *bnxt_re_create_qp(struct ibv_pd *ibvpd, 1539 struct ibv_qp_init_attr *attr) 1540 { 1541 struct bnxt_re_context *cntx = to_bnxt_re_context(ibvpd->context); 1542 struct bnxt_re_qp_resp resp = {}; 1543 struct ibv_device_attr *devattr; 1544 struct bnxt_re_qp_req req = {}; 1545 struct bnxt_re_qattr qattr[2]; 1546 struct bnxt_re_qpcap *cap; 1547 struct bnxt_re_dev *rdev; 1548 struct bnxt_re_qp *qp; 1549 void *mem; 1550 1551 if (bnxt_re_check_qp_limits(cntx, attr)) 1552 return NULL; 1553 1554 memset(qattr, 0, (2 * sizeof(*qattr))); 1555 mem = bnxt_re_alloc_qpslab(cntx, attr, qattr); 1556 if (!mem) 1557 return NULL; 1558 qp = bnxt_re_get_obj(mem, sizeof(*qp)); 1559 if (!qp) 1560 goto fail; 1561 qp->mem = mem; 1562 1563 qp->cctx = cntx->cctx; 1564 1565 qp->cntx = cntx; 1566 qp->qpmode = cntx->modes & BNXT_RE_WQE_MODE_VARIABLE; 1567 /* alloc queue pointers */ 1568 if (bnxt_re_alloc_queue_ptr(qp, attr)) 1569 goto fail; 1570 /* alloc queues */ 1571 if (bnxt_re_alloc_queues(qp, attr, qattr)) 1572 goto fail; 1573 /* Fill ibv_cmd */ 1574 cap = &qp->cap; 1575 req.qpsva = (uint64_t)qp->jsqq->hwque->va; 1576 req.qprva = qp->jrqq ? (uint64_t)qp->jrqq->hwque->va : 0; 1577 req.qp_handle = (uint64_t)qp; 1578 1579 if (ibv_cmd_create_qp(ibvpd, &qp->ibvqp, attr, &req.cmd, sizeof(req), 1580 &resp.resp, sizeof(resp))) 1581 goto fail; 1582 1583 qp->qpid = resp.qpid; 1584 qp->qptyp = attr->qp_type; 1585 qp->qpst = IBV_QPS_RESET; 1586 qp->scq = to_bnxt_re_cq(attr->send_cq); 1587 qp->rcq = to_bnxt_re_cq(attr->recv_cq); 1588 if (attr->srq) 1589 qp->srq = to_bnxt_re_srq(attr->srq); 1590 qp->udpi = &cntx->udpi; 1591 qp->rand.seed = qp->qpid; 1592 qp->sq_shadow_db_key = BNXT_RE_DB_KEY_INVALID; 1593 qp->rq_shadow_db_key = BNXT_RE_DB_KEY_INVALID; 1594 qp->sq_msn = 0; 1595 1596 rdev = cntx->rdev; 1597 devattr = &rdev->devattr; 1598 cap->max_ssge = attr->cap.max_send_sge; 1599 cap->max_rsge = attr->cap.max_recv_sge; 1600 cap->max_inline = attr->cap.max_inline_data; 1601 cap->sqsig = attr->sq_sig_all; 1602 cap->is_atomic_cap = devattr->atomic_cap; 1603 INIT_DBLY_LIST_NODE(&qp->snode); 1604 INIT_DBLY_LIST_NODE(&qp->rnode); 1605 INIT_DBLY_LIST_NODE(&qp->dbnode); 1606 1607 /* For SR2, push will be negotiated at modify qp */ 1608 if (_is_chip_gen_p5(qp->cctx) && cntx->udpi.wcdpi) { 1609 qp->push_st_en = 1; 1610 qp->max_push_sz = BNXT_RE_MAX_INLINE_SIZE; 1611 } 1612 1613 if (_is_db_drop_recovery_enable(cntx)) { 1614 pthread_spin_lock(&cntx->qp_dbr_res.lock); 1615 bnxt_re_list_add_node(&qp->dbnode, &cntx->qp_dbr_res.head); 1616 pthread_spin_unlock(&cntx->qp_dbr_res.lock); 1617 } 1618 return &qp->ibvqp; 1619 fail: 1620 bnxt_re_free_mem(mem); 1621 return NULL; 1622 } 1623 1624 int bnxt_re_modify_qp(struct ibv_qp *ibvqp, struct ibv_qp_attr *attr, 1625 int attr_mask) 1626 { 1627 struct bnxt_re_qp *qp = to_bnxt_re_qp(ibvqp); 1628 int rc; 1629 1630 struct bnxt_re_modify_ex_resp resp = {}; 1631 struct bnxt_re_modify_ex_req req = {}; 1632 bool can_issue_mqp_ex = false; 1633 1634 if (bnxt_re_is_mqp_ex_supported(qp->cntx)) { 1635 can_issue_mqp_ex = true; 1636 /* Request for PPP */ 1637 if (can_request_ppp(qp, attr, attr_mask)) { 1638 req.comp_mask |= BNXT_RE_MQP_PPP_REQ_EN; 1639 req.dpi = qp->udpi->wcdpi; 1640 } 1641 if (attr_mask & IBV_QP_PATH_MTU) 1642 req.comp_mask |= BNXT_RE_MQP_PATH_MTU_MASK; 1643 } 1644 rc = ibv_cmd_modify_qp_compat(ibvqp, attr, attr_mask, 1645 can_issue_mqp_ex, &req, &resp); 1646 if (!rc) { 1647 if (attr_mask & IBV_QP_STATE) { 1648 qp->qpst = attr->qp_state; 1649 /* transition to reset */ 1650 if (qp->qpst == IBV_QPS_RESET) { 1651 qp->jsqq->hwque->head = 0; 1652 qp->jsqq->hwque->tail = 0; 1653 *qp->jsqq->hwque->dbtail = 0; 1654 qp->jsqq->start_idx = 0; 1655 qp->jsqq->last_idx = 0; 1656 bnxt_re_cleanup_cq(qp, qp->scq); 1657 if (qp->jrqq) { 1658 qp->jrqq->hwque->head = 0; 1659 qp->jrqq->hwque->tail = 0; 1660 *qp->jrqq->hwque->dbtail = 0; 1661 qp->jrqq->start_idx = 0; 1662 qp->jrqq->last_idx = 0; 1663 bnxt_re_cleanup_cq(qp, qp->rcq); 1664 } 1665 } 1666 /* Copy if PUSH was enabled */ 1667 if (resp.comp_mask & BNXT_RE_MQP_PPP_REQ_EN_MASK) { 1668 qp->push_st_en = BNXT_RE_MQP_PPP_REQ_EN; 1669 /* Set the next posting state 1670 * based on current h/w state 1671 */ 1672 qp->push_st_en |= 1673 !(!!(resp.ppp_st_idx & 1674 BNXT_RE_MQP_PPP_STATE)) << 1675 BNXT_RE_PPP_ST_SHIFT; 1676 qp->ppp_idx = 1677 (resp.ppp_st_idx & 1678 BNXT_RE_MQP_PPP_IDX_MASK); 1679 if (qp->qpmode == BNXT_RE_WQE_MODE_VARIABLE) 1680 qp->max_push_sz = 1681 BNXT_RE_MAX_PUSH_SIZE_VAR_WQE; 1682 else 1683 qp->max_push_sz = 1684 BNXT_RE_MAX_INLINE_SIZE; 1685 } 1686 } 1687 1688 if (attr_mask & IBV_QP_SQ_PSN) 1689 qp->sq_psn = attr->sq_psn; 1690 1691 if (resp.comp_mask & BNXT_RE_MQP_PATH_MTU_MASK) 1692 qp->mtu = resp.path_mtu; 1693 else if (attr_mask & IBV_QP_PATH_MTU) 1694 qp->mtu = (0x80 << attr->path_mtu); 1695 } 1696 1697 return rc; 1698 } 1699 1700 int bnxt_re_query_qp(struct ibv_qp *ibvqp, struct ibv_qp_attr *attr, 1701 int attr_mask, struct ibv_qp_init_attr *init_attr) 1702 { 1703 struct bnxt_re_qp *qp = to_bnxt_re_qp(ibvqp); 1704 struct ibv_query_qp cmd = {}; 1705 int rc; 1706 1707 rc = ibv_cmd_query_qp(ibvqp, attr, attr_mask, init_attr, 1708 &cmd, sizeof(cmd)); 1709 if (!rc) 1710 qp->qpst = ibvqp->state; 1711 1712 return rc; 1713 } 1714 1715 int bnxt_re_destroy_qp(struct ibv_qp *ibvqp) 1716 { 1717 struct bnxt_re_qp *qp = to_bnxt_re_qp(ibvqp); 1718 struct bnxt_re_mem *mem; 1719 int status; 1720 1721 qp->qpst = IBV_QPS_RESET; 1722 if (_is_db_drop_recovery_enable(qp->cntx)) { 1723 pthread_spin_lock(&qp->cntx->qp_dbr_res.lock); 1724 bnxt_re_list_del_node(&qp->dbnode, &qp->cntx->qp_dbr_res.head); 1725 pthread_spin_unlock(&qp->cntx->qp_dbr_res.lock); 1726 } 1727 status = ibv_cmd_destroy_qp(ibvqp); 1728 if (status) { 1729 if (_is_db_drop_recovery_enable(qp->cntx)) { 1730 pthread_spin_lock(&qp->cntx->qp_dbr_res.lock); 1731 bnxt_re_list_add_node(&qp->dbnode, 1732 &qp->cntx->qp_dbr_res.head); 1733 pthread_spin_unlock(&qp->cntx->qp_dbr_res.lock); 1734 } 1735 return status; 1736 } 1737 bnxt_re_cleanup_cq(qp, qp->rcq); 1738 bnxt_re_cleanup_cq(qp, qp->scq); 1739 mem = qp->mem; 1740 bnxt_re_free_mem(mem); 1741 return 0; 1742 } 1743 1744 static void bnxt_re_put_rx_sge(struct bnxt_re_queue *que, uint32_t *idx, 1745 struct ibv_sge *sgl, int nsg) 1746 { 1747 struct bnxt_re_sge *sge; 1748 int indx; 1749 1750 for (indx = 0; indx < nsg; indx++) { 1751 sge = bnxt_re_get_hwqe(que, (*idx)++); 1752 sge->pa = htole64(sgl[indx].addr); 1753 sge->lkey = htole32(sgl[indx].lkey); 1754 sge->length = htole32(sgl[indx].length); 1755 } 1756 } 1757 1758 static int bnxt_re_put_tx_sge(struct bnxt_re_queue *que, uint32_t *idx, 1759 struct ibv_sge *sgl, int nsg) 1760 { 1761 struct bnxt_re_sge *sge; 1762 int indx; 1763 int len; 1764 1765 len = 0; 1766 for (indx = 0; indx < nsg; indx++) { 1767 sge = bnxt_re_get_hwqe(que, (*idx)++); 1768 sge->pa = htole64(sgl[indx].addr); 1769 sge->lkey = htole32(sgl[indx].lkey); 1770 sge->length = htole32(sgl[indx].length); 1771 len += sgl[indx].length; 1772 } 1773 return len; 1774 } 1775 1776 static inline int bnxt_re_calc_inline_len(struct ibv_send_wr *swr) 1777 { 1778 int illen, indx; 1779 1780 illen = 0; 1781 for (indx = 0; indx < swr->num_sge; indx++) 1782 illen += swr->sg_list[indx].length; 1783 return get_aligned(illen, sizeof(struct bnxt_re_sge)); 1784 } 1785 1786 static int bnxt_re_put_inline(struct bnxt_re_queue *que, uint32_t *idx, 1787 struct bnxt_re_push_buffer *pbuf, 1788 struct ibv_sge *sgl, uint32_t nsg, 1789 uint16_t max_ils) 1790 { 1791 int len, t_len, offt = 0; 1792 int t_cplen = 0, cplen; 1793 bool pull_dst = true; 1794 void *il_dst = NULL; 1795 void *il_src = NULL; 1796 int alsize; 1797 int indx; 1798 1799 alsize = sizeof(struct bnxt_re_sge); 1800 1801 t_len = 0; 1802 for (indx = 0; indx < nsg; indx++) { 1803 len = sgl[indx].length; 1804 il_src = (void *)sgl[indx].addr; 1805 t_len += len; 1806 if (t_len > max_ils) 1807 goto bad; 1808 while (len) { 1809 if (pull_dst) { 1810 pull_dst = false; 1811 il_dst = bnxt_re_get_hwqe(que, (*idx)++); 1812 if (pbuf) 1813 pbuf->wqe[*idx - 1] = 1814 (__u64)il_dst; 1815 t_cplen = 0; 1816 offt = 0; 1817 } 1818 cplen = MIN(len, alsize); 1819 cplen = MIN(cplen,(alsize - offt)); 1820 memcpy(il_dst, il_src, cplen); 1821 t_cplen += cplen; 1822 il_src += cplen; 1823 il_dst += cplen; 1824 offt += cplen; 1825 len -= cplen; 1826 if (t_cplen == alsize) 1827 pull_dst = true; 1828 } 1829 } 1830 1831 return t_len; 1832 bad: 1833 return -ENOMEM; 1834 } 1835 1836 static int bnxt_re_required_slots(struct bnxt_re_qp *qp, struct ibv_send_wr *wr, 1837 uint32_t *wqe_sz, void **pbuf) 1838 { 1839 uint32_t wqe_byte; 1840 int ilsize; 1841 1842 if (wr->send_flags & IBV_SEND_INLINE) { 1843 ilsize = bnxt_re_calc_inline_len(wr); 1844 if (ilsize > qp->cap.max_inline) 1845 return -EINVAL; 1846 if (qp->push_st_en && ilsize <= qp->max_push_sz) 1847 *pbuf = bnxt_re_get_pbuf(&qp->push_st_en, qp->ppp_idx, qp->cntx); 1848 wqe_byte = (ilsize + bnxt_re_get_sqe_hdr_sz()); 1849 } else { 1850 wqe_byte = bnxt_re_calc_wqe_sz(wr->num_sge); 1851 } 1852 1853 /* que->stride is always 2^4 = 16, thus using hard-coding */ 1854 *wqe_sz = wqe_byte >> 4; 1855 if (qp->qpmode == BNXT_RE_WQE_MODE_STATIC) 1856 return 8; 1857 return *wqe_sz; 1858 } 1859 1860 static inline void bnxt_re_set_hdr_flags(struct bnxt_re_bsqe *hdr, 1861 struct ibv_send_wr *wr, 1862 uint32_t slots, uint8_t sqsig) 1863 { 1864 uint32_t send_flags; 1865 uint32_t hdrval = 0; 1866 uint8_t opcd; 1867 1868 send_flags = wr->send_flags; 1869 if (send_flags & IBV_SEND_SIGNALED || sqsig) 1870 hdrval |= ((BNXT_RE_WR_FLAGS_SIGNALED & BNXT_RE_HDR_FLAGS_MASK) 1871 << BNXT_RE_HDR_FLAGS_SHIFT); 1872 if (send_flags & IBV_SEND_FENCE) 1873 hdrval |= ((BNXT_RE_WR_FLAGS_UC_FENCE & BNXT_RE_HDR_FLAGS_MASK) 1874 << BNXT_RE_HDR_FLAGS_SHIFT); 1875 if (send_flags & IBV_SEND_SOLICITED) 1876 hdrval |= ((BNXT_RE_WR_FLAGS_SE & BNXT_RE_HDR_FLAGS_MASK) 1877 << BNXT_RE_HDR_FLAGS_SHIFT); 1878 if (send_flags & IBV_SEND_INLINE) 1879 hdrval |= ((BNXT_RE_WR_FLAGS_INLINE & BNXT_RE_HDR_FLAGS_MASK) 1880 << BNXT_RE_HDR_FLAGS_SHIFT); 1881 hdrval |= (slots & BNXT_RE_HDR_WS_MASK) << BNXT_RE_HDR_WS_SHIFT; 1882 1883 /* Fill opcode */ 1884 opcd = ibv_to_bnxt_re_wr_opcd[wr->opcode]; 1885 hdrval |= (opcd & BNXT_RE_HDR_WT_MASK); 1886 hdr->rsv_ws_fl_wt = htole32(hdrval); 1887 } 1888 1889 static int bnxt_re_build_tx_sge(struct bnxt_re_queue *que, uint32_t *idx, 1890 struct bnxt_re_push_buffer *pbuf, 1891 struct ibv_send_wr *wr, 1892 uint16_t max_il) 1893 { 1894 if (wr->send_flags & IBV_SEND_INLINE) 1895 return bnxt_re_put_inline(que, idx, pbuf, wr->sg_list, wr->num_sge, max_il); 1896 1897 return bnxt_re_put_tx_sge(que, idx, wr->sg_list, wr->num_sge); 1898 } 1899 1900 static void *bnxt_re_pull_psn_buff(struct bnxt_re_queue *que, bool hw_retx) 1901 { 1902 if (hw_retx) 1903 return (void *)(que->pad + ((que->msn) << que->pad_stride_log2)); 1904 return (void *)(que->pad + ((*que->dbtail) << que->pad_stride_log2)); 1905 } 1906 1907 static void bnxt_re_fill_psns_for_msntbl(struct bnxt_re_qp *qp, uint32_t len, 1908 uint32_t st_idx, uint8_t opcode) 1909 { 1910 uint32_t npsn = 0, start_psn = 0, next_psn = 0; 1911 struct bnxt_re_msns *msns; 1912 uint32_t pkt_cnt = 0; 1913 1914 msns = bnxt_re_pull_psn_buff(qp->jsqq->hwque, true); 1915 msns->start_idx_next_psn_start_psn = 0; 1916 1917 if (qp->qptyp == IBV_QPT_RC) { 1918 start_psn = qp->sq_psn; 1919 pkt_cnt = (len / qp->mtu); 1920 if (len % qp->mtu) 1921 pkt_cnt++; 1922 /* Increment the psn even for 0 len packets 1923 * e.g. for opcode rdma-write-with-imm-data 1924 * with length field = 0 1925 */ 1926 if (bnxt_re_is_zero_len_pkt(len, opcode)) 1927 pkt_cnt = 1; 1928 /* make it 24 bit */ 1929 next_psn = qp->sq_psn + pkt_cnt; 1930 npsn = next_psn; 1931 qp->sq_psn = next_psn; 1932 msns->start_idx_next_psn_start_psn |= 1933 bnxt_re_update_msn_tbl(st_idx, npsn, start_psn); 1934 qp->jsqq->hwque->msn++; 1935 qp->jsqq->hwque->msn %= qp->jsqq->hwque->msn_tbl_sz; 1936 } 1937 } 1938 1939 static void bnxt_re_fill_psns(struct bnxt_re_qp *qp, uint32_t len, 1940 uint32_t st_idx, uint8_t opcode) 1941 { 1942 uint32_t opc_spsn = 0, flg_npsn = 0; 1943 struct bnxt_re_psns_ext *psns_ext; 1944 uint32_t pkt_cnt = 0, nxt_psn = 0; 1945 struct bnxt_re_psns *psns; 1946 1947 psns = bnxt_re_pull_psn_buff(qp->jsqq->hwque, false); 1948 psns_ext = (struct bnxt_re_psns_ext *)psns; 1949 1950 if (qp->qptyp == IBV_QPT_RC) { 1951 opc_spsn = qp->sq_psn & BNXT_RE_PSNS_SPSN_MASK; 1952 pkt_cnt = (len / qp->mtu); 1953 if (len % qp->mtu) 1954 pkt_cnt++; 1955 /* Increment the psn even for 0 len packets 1956 * e.g. for opcode rdma-write-with-imm-data 1957 * with length field = 0 1958 */ 1959 if (bnxt_re_is_zero_len_pkt(len, opcode)) 1960 pkt_cnt = 1; 1961 nxt_psn = ((qp->sq_psn + pkt_cnt) & BNXT_RE_PSNS_NPSN_MASK); 1962 flg_npsn = nxt_psn; 1963 qp->sq_psn = nxt_psn; 1964 } 1965 psns->opc_spsn = htole32(opc_spsn); 1966 psns->flg_npsn = htole32(flg_npsn); 1967 /* Update for Thor p5 not Thor2 */ 1968 if (!BNXT_RE_HW_RETX(qp->cntx) && qp->cctx->chip_is_gen_p5_thor2) 1969 psns_ext->st_slot_idx = st_idx; 1970 } 1971 1972 static int bnxt_re_build_ud_sqe(struct ibv_send_wr *wr, 1973 struct bnxt_re_bsqe *hdr, 1974 struct bnxt_re_send *sqe) 1975 { 1976 struct bnxt_re_ah *ah; 1977 uint64_t qkey; 1978 1979 ah = to_bnxt_re_ah(wr->wr.ud.ah); 1980 if (!wr->wr.ud.ah) 1981 return -EINVAL; 1982 qkey = wr->wr.ud.remote_qkey; 1983 hdr->lhdr.qkey_len |= htole64(qkey << 32); 1984 sqe->dst_qp = htole32(wr->wr.ud.remote_qpn); 1985 sqe->avid = htole32(ah->avid & 0xFFFFF); 1986 1987 return 0; 1988 } 1989 1990 static void bnxt_re_build_cns_sqe(struct ibv_send_wr *wr, 1991 struct bnxt_re_bsqe *hdr, 1992 void *hdr2) 1993 { 1994 struct bnxt_re_atomic *sqe = hdr2; 1995 1996 hdr->key_immd = htole32(wr->wr.atomic.rkey); 1997 hdr->lhdr.rva = htole64(wr->wr.atomic.remote_addr); 1998 sqe->cmp_dt = htole64(wr->wr.atomic.compare_add); 1999 sqe->swp_dt = htole64(wr->wr.atomic.swap); 2000 } 2001 2002 static void bnxt_re_build_fna_sqe(struct ibv_send_wr *wr, 2003 struct bnxt_re_bsqe *hdr, 2004 void *hdr2) 2005 { 2006 struct bnxt_re_atomic *sqe = hdr2; 2007 2008 hdr->key_immd = htole32(wr->wr.atomic.rkey); 2009 hdr->lhdr.rva = htole64(wr->wr.atomic.remote_addr); 2010 sqe->swp_dt = htole64(wr->wr.atomic.compare_add); 2011 } 2012 2013 void bnxt_re_force_rts2rts(struct bnxt_re_qp *qp) 2014 { 2015 struct ibv_qp_attr attr = {}; 2016 int attr_mask; 2017 attr_mask = IBV_QP_STATE; 2018 attr.qp_state = IBV_QPS_RTS; 2019 bnxt_re_modify_qp(&qp->ibvqp, &attr, attr_mask); 2020 qp->wqe_cnt = 0; 2021 } 2022 2023 int bnxt_re_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr, 2024 struct ibv_send_wr **bad) 2025 { 2026 struct bnxt_re_qp *qp = to_bnxt_re_qp(ibvqp); 2027 struct bnxt_re_queue *sq = qp->jsqq->hwque; 2028 struct bnxt_re_push_buffer *pbuf = NULL; 2029 bool chip_is_not_gen_p5_thor2; 2030 int slots, ret = 0, len = 0; 2031 uint32_t swq_idx, wqe_size; 2032 struct bnxt_re_wrid *wrid; 2033 struct bnxt_re_rdma *rsqe; 2034 struct bnxt_re_bsqe *hdr; 2035 struct bnxt_re_send *sqe; 2036 bool ring_db = false; 2037 uint32_t idx; 2038 2039 bnxt_re_dp_spin_lock(&sq->qlock); 2040 chip_is_not_gen_p5_thor2 = !qp->cctx->chip_is_gen_p5_thor2; 2041 while (wr) { 2042 slots = bnxt_re_required_slots(qp, wr, &wqe_size, (void **)&pbuf); 2043 if (unlikely(slots < 0 || bnxt_re_is_que_full(sq, slots)) || 2044 wr->num_sge > qp->cap.max_ssge) { 2045 *bad = wr; 2046 ret = ENOMEM; 2047 goto bad_wr; 2048 } 2049 if ((wr->opcode == IBV_WR_ATOMIC_CMP_AND_SWP || 2050 wr->opcode == IBV_WR_ATOMIC_FETCH_AND_ADD) && 2051 !qp->cap.is_atomic_cap) { 2052 *bad = wr; 2053 ret = EINVAL; 2054 goto bad_wr; 2055 } 2056 idx = 0; 2057 len = 0; 2058 hdr = bnxt_re_get_hwqe(sq, idx++); 2059 sqe = bnxt_re_get_hwqe(sq, idx++); 2060 /* populate push buffer */ 2061 if (pbuf) { 2062 pbuf->qpid = qp->qpid; 2063 pbuf->wqe[0] = (__u64)hdr; 2064 pbuf->wqe[1] = (__u64)sqe; 2065 pbuf->st_idx = *sq->dbtail; 2066 } 2067 if (wr->num_sge) { 2068 len = bnxt_re_build_tx_sge(sq, &idx, pbuf, wr, qp->cap.max_inline); 2069 if (unlikely(len < 0)) { 2070 ret = ENOMEM; 2071 *bad = wr; 2072 goto bad_wr; 2073 } 2074 } 2075 hdr->lhdr.qkey_len = htole32(len); 2076 bnxt_re_set_hdr_flags(hdr, wr, wqe_size, qp->cap.sqsig); 2077 switch (wr->opcode) { 2078 case IBV_WR_SEND_WITH_IMM: 2079 /* HW is swapping the immediate data before 2080 * sending it out on the wire. To workaround 2081 * this, swap the imm_data value as sent by 2082 * the application so that the value going out 2083 * on the wire is in big-endian format. 2084 */ 2085 hdr->key_immd = htole32(be32toh(wr->imm_data)); 2086 if (qp->qptyp == IBV_QPT_UD) { 2087 if (chip_is_not_gen_p5_thor2 && 2088 qp->wqe_cnt == BNXT_RE_UD_QP_STALL) 2089 bnxt_re_force_rts2rts(qp); 2090 2091 len = bnxt_re_build_ud_sqe(wr, hdr, sqe); 2092 } 2093 break; 2094 case IBV_WR_SEND: 2095 if (qp->qptyp == IBV_QPT_UD) { 2096 if (chip_is_not_gen_p5_thor2 && 2097 qp->wqe_cnt == BNXT_RE_UD_QP_STALL) 2098 bnxt_re_force_rts2rts(qp); 2099 2100 len = bnxt_re_build_ud_sqe(wr, hdr, sqe); 2101 } 2102 break; 2103 case IBV_WR_RDMA_WRITE_WITH_IMM: 2104 hdr->key_immd = htole32(be32toh(wr->imm_data)); 2105 case IBV_WR_RDMA_WRITE: 2106 case IBV_WR_RDMA_READ: 2107 rsqe = (struct bnxt_re_rdma *)sqe; 2108 rsqe->rva = htole64(wr->wr.rdma.remote_addr); 2109 rsqe->rkey = htole32(wr->wr.rdma.rkey); 2110 break; 2111 case IBV_WR_ATOMIC_CMP_AND_SWP: 2112 bnxt_re_build_cns_sqe(wr, hdr, sqe); 2113 break; 2114 case IBV_WR_ATOMIC_FETCH_AND_ADD: 2115 bnxt_re_build_fna_sqe(wr, hdr, sqe); 2116 break; 2117 default : 2118 len = -EINVAL; 2119 break; 2120 } 2121 2122 if (unlikely(len < 0)) { 2123 ret = (len == -EINVAL) ? EINVAL : ENOMEM; 2124 *bad = wr; 2125 break; 2126 } 2127 if (BNXT_RE_HW_RETX(qp->cntx)) 2128 bnxt_re_fill_psns_for_msntbl(qp, len, *sq->dbtail, wr->opcode); 2129 else 2130 bnxt_re_fill_psns(qp, len, *sq->dbtail, wr->opcode); 2131 2132 wrid = bnxt_re_get_swqe(qp->jsqq, &swq_idx); 2133 wrid->wrid = wr->wr_id; 2134 wrid->bytes = len; 2135 wrid->slots = slots; 2136 wrid->sig = (wr->send_flags & IBV_SEND_SIGNALED || qp->cap.sqsig) ? 2137 IBV_SEND_SIGNALED : 0; 2138 wrid->wc_opcd = ibv_wr_to_wc_opcd[wr->opcode]; 2139 2140 bnxt_re_incr_tail(sq, slots); 2141 bnxt_re_jqq_mod_start(qp->jsqq, swq_idx); 2142 ring_db = true; 2143 if (pbuf) { 2144 ring_db = false; 2145 pbuf->tail = *sq->dbtail; 2146 if (_is_chip_thor2(qp->cctx)) { 2147 /* WA for SR2 A0, ring additional db */ 2148 ring_db |= _is_chip_a0(qp->cctx); 2149 bnxt_re_fill_ppp(pbuf, qp, len, idx); 2150 } else { 2151 bnxt_re_fill_push_wcb(qp, pbuf, idx); 2152 } 2153 2154 bnxt_re_put_pbuf(qp->cntx, pbuf); 2155 pbuf = NULL; 2156 } 2157 qp->wqe_cnt++; 2158 qp->sq_msn++; 2159 wr = wr->next; 2160 } 2161 2162 bad_wr: 2163 if (ring_db) 2164 bnxt_re_ring_sq_db(qp); 2165 2166 if (pbuf) 2167 bnxt_re_put_pbuf(qp->cntx, pbuf); 2168 2169 bnxt_re_dp_spin_unlock(&sq->qlock); 2170 return ret; 2171 } 2172 2173 int bnxt_re_post_recv(struct ibv_qp *ibvqp, struct ibv_recv_wr *wr, 2174 struct ibv_recv_wr **bad) 2175 { 2176 struct bnxt_re_qp *qp = to_bnxt_re_qp(ibvqp); 2177 struct bnxt_re_queue *rq = qp->jrqq->hwque; 2178 struct bnxt_re_wrid *swque; 2179 struct bnxt_re_brqe *hdr; 2180 struct bnxt_re_sge *sge; 2181 bool ring_db = false; 2182 uint32_t swq_idx; 2183 uint32_t hdrval; 2184 uint32_t idx; 2185 int rc = 0; 2186 2187 bnxt_re_dp_spin_lock(&rq->qlock); 2188 while (wr) { 2189 if (unlikely(bnxt_re_is_que_full(rq, rq->max_slots) || 2190 wr->num_sge > qp->cap.max_rsge)) { 2191 *bad = wr; 2192 rc = ENOMEM; 2193 break; 2194 } 2195 swque = bnxt_re_get_swqe(qp->jrqq, &swq_idx); 2196 2197 /* 2198 * Initialize idx to 2 since the length of header wqe is 32 bytes 2199 * i.e. sizeof(struct bnxt_re_brqe) + sizeof(struct bnxt_re_send) 2200 */ 2201 idx = 2; 2202 hdr = bnxt_re_get_hwqe_hdr(rq); 2203 2204 if (!wr->num_sge) { 2205 /* 2206 * HW needs at least one SGE for RQ Entries. 2207 * Create an entry if num_sge = 0, 2208 * update the idx and set length of sge to 0. 2209 */ 2210 sge = bnxt_re_get_hwqe(rq, idx++); 2211 sge->length = 0; 2212 } else { 2213 /* Fill SGEs */ 2214 bnxt_re_put_rx_sge(rq, &idx, wr->sg_list, wr->num_sge); 2215 } 2216 hdrval = BNXT_RE_WR_OPCD_RECV; 2217 hdrval |= ((idx & BNXT_RE_HDR_WS_MASK) << BNXT_RE_HDR_WS_SHIFT); 2218 hdr->rsv_ws_fl_wt = htole32(hdrval); 2219 hdr->wrid = htole32(swq_idx); 2220 2221 swque->wrid = wr->wr_id; 2222 swque->slots = rq->max_slots; 2223 swque->wc_opcd = BNXT_RE_WC_OPCD_RECV; 2224 2225 bnxt_re_jqq_mod_start(qp->jrqq, swq_idx); 2226 bnxt_re_incr_tail(rq, rq->max_slots); 2227 ring_db = true; 2228 wr = wr->next; 2229 } 2230 if (ring_db) 2231 bnxt_re_ring_rq_db(qp); 2232 bnxt_re_dp_spin_unlock(&rq->qlock); 2233 2234 return rc; 2235 } 2236 2237 static size_t bnxt_re_get_srqmem_size(struct bnxt_re_context *cntx, 2238 struct ibv_srq_init_attr *attr, 2239 struct bnxt_re_qattr *qattr) 2240 { 2241 uint32_t stride, nswr; 2242 size_t size = 0; 2243 2244 size = sizeof(struct bnxt_re_srq); 2245 size += sizeof(struct bnxt_re_queue); 2246 /* allocate 1 extra to determin full condition */ 2247 nswr = attr->attr.max_wr + 1; 2248 nswr = bnxt_re_init_depth(nswr, cntx->comp_mask); 2249 stride = bnxt_re_get_srqe_sz(); 2250 2251 qattr->nwr = nswr; 2252 qattr->slots = nswr; 2253 qattr->esize = stride; 2254 2255 qattr->sz_ring = get_aligned((nswr * stride), cntx->rdev->pg_size); 2256 qattr->sz_shad = nswr * sizeof(struct bnxt_re_wrid); /* shadow */ 2257 2258 size += qattr->sz_ring; 2259 size += qattr->sz_shad; 2260 return size; 2261 } 2262 2263 static void *bnxt_re_alloc_srqslab(struct bnxt_re_context *cntx, 2264 struct ibv_srq_init_attr *attr, 2265 struct bnxt_re_qattr *qattr) 2266 { 2267 size_t bytes; 2268 2269 bytes = bnxt_re_get_srqmem_size(cntx, attr, qattr); 2270 return bnxt_re_alloc_mem(bytes, cntx->rdev->pg_size); 2271 } 2272 2273 static struct bnxt_re_srq *bnxt_re_srq_alloc_queue_ptr(struct bnxt_re_mem *mem) 2274 { 2275 struct bnxt_re_srq *srq; 2276 2277 srq = bnxt_re_get_obj(mem, sizeof(*srq)); 2278 if (!srq) 2279 return NULL; 2280 srq->srqq = bnxt_re_get_obj(mem, sizeof(struct bnxt_re_queue)); 2281 if (!srq->srqq) 2282 return NULL; 2283 return srq; 2284 } 2285 2286 static int bnxt_re_srq_alloc_queue(struct bnxt_re_srq *srq, 2287 struct ibv_srq_init_attr *attr, 2288 struct bnxt_re_qattr *qattr) 2289 { 2290 struct bnxt_re_queue *que; 2291 int ret = -ENOMEM; 2292 int idx; 2293 2294 que = srq->srqq; 2295 que->depth = qattr->slots; 2296 que->stride = qattr->esize; 2297 que->va = bnxt_re_get_ring(srq->mem, qattr->sz_ring); 2298 if (!que->va) 2299 goto bail; 2300 bnxt_re_dp_spin_init(&que->qlock, PTHREAD_PROCESS_PRIVATE, !bnxt_single_threaded); 2301 /* For SRQ only bnxt_re_wrid.wrid is used. */ 2302 srq->srwrid = bnxt_re_get_obj(srq->mem, qattr->sz_shad); 2303 if (!srq->srwrid) 2304 goto bail; 2305 2306 srq->start_idx = 0; 2307 srq->last_idx = que->depth - 1; 2308 for (idx = 0; idx < que->depth; idx++) 2309 srq->srwrid[idx].next_idx = idx + 1; 2310 srq->srwrid[srq->last_idx].next_idx = -1; 2311 return 0; 2312 bail: 2313 bnxt_re_dp_spin_destroy(&srq->srqq->qlock); 2314 return ret; 2315 } 2316 2317 struct ibv_srq *bnxt_re_create_srq(struct ibv_pd *ibvpd, 2318 struct ibv_srq_init_attr *attr) 2319 { 2320 struct bnxt_re_srq_resp resp = {}; 2321 struct bnxt_re_srq_req cmd = {}; 2322 struct bnxt_re_qattr qattr = {}; 2323 struct bnxt_re_context *uctx; 2324 struct bnxt_re_srq *srq; 2325 void *mem; 2326 int ret; 2327 2328 uctx = to_bnxt_re_context(ibvpd->context); 2329 mem = bnxt_re_alloc_srqslab(uctx, attr, &qattr); 2330 if (!mem) 2331 return NULL; 2332 2333 srq = bnxt_re_srq_alloc_queue_ptr(mem); 2334 if (!srq) 2335 goto fail; 2336 srq->uctx = uctx; 2337 srq->mem = mem; 2338 if (bnxt_re_srq_alloc_queue(srq, attr, &qattr)) 2339 goto fail; 2340 2341 cmd.srqva = (uint64_t)srq->srqq->va; 2342 cmd.srq_handle = (uint64_t)srq; 2343 ret = ibv_cmd_create_srq(ibvpd, &srq->ibvsrq, attr, 2344 &cmd.cmd, sizeof(cmd), 2345 &resp.resp, sizeof(resp)); 2346 if (ret) 2347 goto fail; 2348 2349 srq->srqid = resp.srqid; 2350 srq->udpi = &uctx->udpi; 2351 srq->cap.max_wr = srq->srqq->depth; 2352 srq->cap.max_sge = attr->attr.max_sge; 2353 srq->cap.srq_limit = attr->attr.srq_limit; 2354 srq->arm_req = false; 2355 srq->rand.seed = srq->srqid; 2356 srq->shadow_db_key = BNXT_RE_DB_KEY_INVALID; 2357 2358 INIT_DBLY_LIST_NODE(&srq->dbnode); 2359 if (_is_db_drop_recovery_enable(uctx)) { 2360 pthread_spin_lock(&uctx->srq_dbr_res.lock); 2361 bnxt_re_list_add_node(&srq->dbnode, &uctx->srq_dbr_res.head); 2362 pthread_spin_unlock(&uctx->srq_dbr_res.lock); 2363 } 2364 return &srq->ibvsrq; 2365 fail: 2366 bnxt_re_free_mem(mem); 2367 return NULL; 2368 } 2369 2370 int bnxt_re_modify_srq(struct ibv_srq *ibvsrq, struct ibv_srq_attr *attr, 2371 int attr_mask) 2372 { 2373 struct bnxt_re_srq *srq = to_bnxt_re_srq(ibvsrq); 2374 struct ibv_modify_srq cmd = {}; 2375 int status = 0; 2376 2377 status = ibv_cmd_modify_srq(ibvsrq, attr, attr_mask, 2378 &cmd, sizeof(cmd)); 2379 if (!status && ((attr_mask & IBV_SRQ_LIMIT) && 2380 (srq->cap.srq_limit != attr->srq_limit))) { 2381 srq->cap.srq_limit = attr->srq_limit; 2382 } 2383 srq->arm_req = true; 2384 return status; 2385 } 2386 2387 int bnxt_re_destroy_srq(struct ibv_srq *ibvsrq) 2388 { 2389 struct bnxt_re_srq *srq = to_bnxt_re_srq(ibvsrq); 2390 struct bnxt_re_mem *mem; 2391 int ret; 2392 2393 if (_is_db_drop_recovery_enable(srq->uctx)) { 2394 pthread_spin_lock(&srq->uctx->srq_dbr_res.lock); 2395 bnxt_re_list_del_node(&srq->dbnode, &srq->uctx->srq_dbr_res.head); 2396 pthread_spin_unlock(&srq->uctx->srq_dbr_res.lock); 2397 } 2398 ret = ibv_cmd_destroy_srq(ibvsrq); 2399 if (ret) { 2400 if (_is_db_drop_recovery_enable(srq->uctx)) { 2401 pthread_spin_lock(&srq->uctx->srq_dbr_res.lock); 2402 bnxt_re_list_add_node(&srq->dbnode, 2403 &srq->uctx->srq_dbr_res.head); 2404 pthread_spin_unlock(&srq->uctx->srq_dbr_res.lock); 2405 } 2406 return ret; 2407 } 2408 bnxt_re_dp_spin_destroy(&srq->srqq->qlock); 2409 mem = srq->mem; 2410 bnxt_re_free_mem(mem); 2411 return 0; 2412 } 2413 2414 int bnxt_re_query_srq(struct ibv_srq *ibvsrq, struct ibv_srq_attr *attr) 2415 { 2416 struct ibv_query_srq cmd = {}; 2417 2418 return ibv_cmd_query_srq(ibvsrq, attr, &cmd, sizeof cmd); 2419 } 2420 2421 static int bnxt_re_build_srqe(struct bnxt_re_srq *srq, 2422 struct ibv_recv_wr *wr, void *srqe) 2423 { 2424 struct bnxt_re_brqe *hdr = srqe; 2425 struct bnxt_re_wrid *wrid; 2426 struct bnxt_re_sge *sge; 2427 int wqe_sz, len, next; 2428 uint32_t hdrval = 0; 2429 int indx; 2430 2431 sge = (srqe + bnxt_re_get_srqe_hdr_sz()); 2432 next = srq->start_idx; 2433 wrid = &srq->srwrid[next]; 2434 2435 len = 0; 2436 for (indx = 0; indx < wr->num_sge; indx++, sge++) { 2437 sge->pa = htole64(wr->sg_list[indx].addr); 2438 sge->lkey = htole32(wr->sg_list[indx].lkey); 2439 sge->length = htole32(wr->sg_list[indx].length); 2440 len += wr->sg_list[indx].length; 2441 } 2442 2443 hdrval = BNXT_RE_WR_OPCD_RECV; 2444 wqe_sz = wr->num_sge + (bnxt_re_get_srqe_hdr_sz() >> 4); /* 16B align */ 2445 /* HW needs at least one SGE for SRQ Entries. 2446 * Increment SRQ WQE size if num_sge = 0 to 2447 * include the extra SGE. Set the sge length to 2448 * zero. 2449 */ 2450 if (!wr->num_sge) { 2451 wqe_sz++; 2452 sge->length = 0; 2453 } 2454 hdrval |= ((wqe_sz & BNXT_RE_HDR_WS_MASK) << BNXT_RE_HDR_WS_SHIFT); 2455 hdr->rsv_ws_fl_wt = htole32(hdrval); 2456 hdr->wrid = htole32((uint32_t)next); 2457 2458 /* Fill wrid */ 2459 wrid->wrid = wr->wr_id; 2460 wrid->bytes = len; /* N.A. for RQE */ 2461 wrid->sig = 0; /* N.A. for RQE */ 2462 2463 return len; 2464 } 2465 2466 int bnxt_re_post_srq_recv(struct ibv_srq *ibvsrq, struct ibv_recv_wr *wr, 2467 struct ibv_recv_wr **bad) 2468 { 2469 struct bnxt_re_srq *srq = to_bnxt_re_srq(ibvsrq); 2470 struct bnxt_re_queue *rq = srq->srqq; 2471 int ret, count = 0; 2472 void *srqe; 2473 2474 bnxt_re_dp_spin_lock(&rq->qlock); 2475 count = rq->tail > rq->head ? rq->tail - rq->head : 2476 rq->depth - rq->head + rq->tail; 2477 while (wr) { 2478 if (srq->start_idx == srq->last_idx || 2479 wr->num_sge > srq->cap.max_sge) { 2480 *bad = wr; 2481 bnxt_re_dp_spin_unlock(&rq->qlock); 2482 return ENOMEM; 2483 } 2484 2485 srqe = (void *) (rq->va + (rq->tail * rq->stride)); 2486 memset(srqe, 0, bnxt_re_get_srqe_sz()); 2487 ret = bnxt_re_build_srqe(srq, wr, srqe); 2488 if (ret < 0) { 2489 bnxt_re_dp_spin_unlock(&rq->qlock); 2490 *bad = wr; 2491 return ENOMEM; 2492 } 2493 2494 srq->start_idx = srq->srwrid[srq->start_idx].next_idx; 2495 bnxt_re_incr_tail(rq, 1); 2496 wr = wr->next; 2497 bnxt_re_ring_srq_db(srq); 2498 count++; 2499 if (srq->arm_req == true && count > srq->cap.srq_limit) { 2500 srq->arm_req = false; 2501 bnxt_re_ring_srq_arm(srq); 2502 } 2503 } 2504 bnxt_re_dp_spin_unlock(&rq->qlock); 2505 2506 return 0; 2507 } 2508 2509 struct ibv_ah *bnxt_re_create_ah(struct ibv_pd *ibvpd, struct ibv_ah_attr *attr) 2510 { 2511 struct bnxt_re_context *uctx; 2512 struct bnxt_re_pd *pd; 2513 struct bnxt_re_ah *ah; 2514 int status; 2515 struct ibv_create_ah_resp resp = {}; 2516 2517 pd = to_bnxt_re_pd(ibvpd); 2518 uctx = to_bnxt_re_context(ibvpd->context); 2519 2520 ah = calloc(1, sizeof(struct bnxt_re_ah)); 2521 if (!ah) { 2522 goto failed; 2523 } 2524 2525 ah->pd = pd; 2526 pthread_mutex_lock(&uctx->shlock); 2527 status = ibv_cmd_create_ah(ibvpd, &ah->ibvah, attr, 2528 &resp, sizeof(resp)); 2529 2530 if (status) 2531 { 2532 pthread_mutex_unlock(&uctx->shlock); 2533 free(ah); 2534 goto failed; 2535 } 2536 /* read AV ID now. */ 2537 ah->avid = *(uint32_t *)(uctx->shpg + BNXT_RE_SHPG_AVID_OFFT); 2538 pthread_mutex_unlock(&uctx->shlock); 2539 2540 return &ah->ibvah; 2541 failed: 2542 return NULL; 2543 } 2544 2545 int bnxt_re_destroy_ah(struct ibv_ah *ibvah) 2546 { 2547 struct bnxt_re_ah *ah; 2548 int status; 2549 2550 ah = to_bnxt_re_ah(ibvah); 2551 status = ibv_cmd_destroy_ah(ibvah); 2552 if (status) 2553 return status; 2554 free(ah); 2555 2556 return 0; 2557 } 2558