1 /*- 2 * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB 3 * 4 * Copyright (C) 2019 - 2021 Intel Corporation 5 * 6 * This software is available to you under a choice of one of two 7 * licenses. You may choose to be licensed under the terms of the GNU 8 * General Public License (GPL) Version 2, available from the file 9 * COPYING in the main directory of this source tree, or the 10 * OpenFabrics.org BSD license below: 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above 17 * copyright notice, this list of conditions and the following 18 * disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials 23 * provided with the distribution. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * SOFTWARE. 33 */ 34 /*$FreeBSD$*/ 35 36 #include <config.h> 37 #include <stdlib.h> 38 #include <stdio.h> 39 #include <string.h> 40 #include <unistd.h> 41 #include <signal.h> 42 #include <errno.h> 43 #include <sys/param.h> 44 #include <sys/mman.h> 45 #include <netinet/in.h> 46 #include <sys/stat.h> 47 #include <fcntl.h> 48 #include <stdbool.h> 49 50 #include "irdma_umain.h" 51 #include "abi.h" 52 53 static inline void 54 print_fw_ver(uint64_t fw_ver, char *str, size_t len) 55 { 56 uint16_t major, minor; 57 58 major = fw_ver >> 32 & 0xffff; 59 minor = fw_ver & 0xffff; 60 61 snprintf(str, len, "%d.%d", major, minor); 62 } 63 64 /** 65 * irdma_uquery_device_ex - query device attributes including extended properties 66 * @context: user context for the device 67 * @input: extensible input struct for ibv_query_device_ex verb 68 * @attr: extended device attribute struct 69 * @attr_size: size of extended device attribute struct 70 **/ 71 int 72 irdma_uquery_device_ex(struct ibv_context *context, 73 const struct ibv_query_device_ex_input *input, 74 struct ibv_device_attr_ex *attr, size_t attr_size) 75 { 76 struct irdma_query_device_ex cmd = {}; 77 struct irdma_query_device_ex_resp resp = {}; 78 uint64_t fw_ver; 79 int ret; 80 81 ret = ibv_cmd_query_device_ex(context, input, attr, attr_size, &fw_ver, 82 &cmd.ibv_cmd, sizeof(cmd.ibv_cmd), sizeof(cmd), 83 &resp.ibv_resp, sizeof(resp.ibv_resp), sizeof(resp)); 84 if (ret) 85 return ret; 86 87 print_fw_ver(fw_ver, attr->orig_attr.fw_ver, sizeof(attr->orig_attr.fw_ver)); 88 89 return 0; 90 } 91 92 /** 93 * irdma_uquery_device - call driver to query device for max resources 94 * @context: user context for the device 95 * @attr: where to save all the mx resources from the driver 96 **/ 97 int 98 irdma_uquery_device(struct ibv_context *context, struct ibv_device_attr *attr) 99 { 100 struct ibv_query_device cmd; 101 uint64_t fw_ver; 102 int ret; 103 104 ret = ibv_cmd_query_device(context, attr, &fw_ver, &cmd, sizeof(cmd)); 105 if (ret) 106 return ret; 107 108 print_fw_ver(fw_ver, attr->fw_ver, sizeof(attr->fw_ver)); 109 110 return 0; 111 } 112 113 /** 114 * irdma_uquery_port - get port attributes (msg size, lnk, mtu...) 115 * @context: user context of the device 116 * @port: port for the attributes 117 * @attr: to return port attributes 118 **/ 119 int 120 irdma_uquery_port(struct ibv_context *context, uint8_t port, 121 struct ibv_port_attr *attr) 122 { 123 struct ibv_query_port cmd; 124 125 return ibv_cmd_query_port(context, port, attr, &cmd, sizeof(cmd)); 126 } 127 128 /** 129 * irdma_ualloc_pd - allocates protection domain and return pd ptr 130 * @context: user context of the device 131 **/ 132 struct ibv_pd * 133 irdma_ualloc_pd(struct ibv_context *context) 134 { 135 struct ibv_alloc_pd cmd; 136 struct irdma_ualloc_pd_resp resp = {}; 137 struct irdma_upd *iwupd; 138 int err; 139 140 iwupd = malloc(sizeof(*iwupd)); 141 if (!iwupd) 142 return NULL; 143 144 err = ibv_cmd_alloc_pd(context, &iwupd->ibv_pd, &cmd, sizeof(cmd), 145 &resp.ibv_resp, sizeof(resp)); 146 if (err) 147 goto err_free; 148 149 iwupd->pd_id = resp.pd_id; 150 151 return &iwupd->ibv_pd; 152 153 err_free: 154 free(iwupd); 155 errno = err; 156 return NULL; 157 } 158 159 /** 160 * irdma_ufree_pd - free pd resources 161 * @pd: pd to free resources 162 */ 163 int 164 irdma_ufree_pd(struct ibv_pd *pd) 165 { 166 struct irdma_upd *iwupd; 167 int ret; 168 169 iwupd = container_of(pd, struct irdma_upd, ibv_pd); 170 ret = ibv_cmd_dealloc_pd(pd); 171 if (ret) 172 return ret; 173 174 free(iwupd); 175 176 return 0; 177 } 178 179 /** 180 * irdma_ureg_mr - register user memory region 181 * @pd: pd for the mr 182 * @addr: user address of the memory region 183 * @length: length of the memory 184 * @hca_va: hca_va 185 * @access: access allowed on this mr 186 */ 187 struct ibv_mr * 188 irdma_ureg_mr(struct ibv_pd *pd, void *addr, size_t length, 189 int access) 190 { 191 struct irdma_umr *umr; 192 struct irdma_ureg_mr cmd; 193 struct ibv_reg_mr_resp resp; 194 int err; 195 196 umr = malloc(sizeof(*umr)); 197 if (!umr) 198 return NULL; 199 200 cmd.reg_type = IRDMA_MEMREG_TYPE_MEM; 201 err = ibv_cmd_reg_mr(pd, addr, length, 202 (uintptr_t)addr, access, &umr->vmr.ibv_mr, &cmd.ibv_cmd, 203 sizeof(cmd), &resp, sizeof(resp)); 204 if (err) { 205 free(umr); 206 errno = err; 207 return NULL; 208 } 209 umr->acc_flags = access; 210 211 return &umr->vmr.ibv_mr; 212 } 213 214 /** 215 * irdma_udereg_mr - re-register memory region 216 * @mr: mr that was allocated 217 */ 218 int 219 irdma_udereg_mr(struct ibv_mr *mr) 220 { 221 struct irdma_umr *umr; 222 struct verbs_mr *vmr; 223 int ret; 224 225 vmr = container_of(mr, struct verbs_mr, ibv_mr); 226 umr = container_of(vmr, struct irdma_umr, vmr); 227 228 ret = ibv_cmd_dereg_mr(mr); 229 if (ret) 230 return ret; 231 232 free(umr); 233 234 return 0; 235 } 236 237 /** 238 * irdma_ualloc_mw - allocate memory window 239 * @pd: protection domain 240 * @type: memory window type 241 */ 242 struct ibv_mw * 243 irdma_ualloc_mw(struct ibv_pd *pd, enum ibv_mw_type type) 244 { 245 struct ibv_mw *mw; 246 struct ibv_alloc_mw cmd; 247 struct ibv_alloc_mw_resp resp; 248 249 mw = calloc(1, sizeof(*mw)); 250 if (!mw) 251 return NULL; 252 253 if (ibv_cmd_alloc_mw(pd, type, mw, &cmd, sizeof(cmd), &resp, 254 sizeof(resp))) { 255 printf("%s: Failed to alloc memory window\n", 256 __func__); 257 free(mw); 258 return NULL; 259 } 260 261 return mw; 262 } 263 264 /** 265 * irdma_ubind_mw - bind a memory window 266 * @qp: qp to post WR 267 * @mw: memory window to bind 268 * @mw_bind: bind info 269 */ 270 int 271 irdma_ubind_mw(struct ibv_qp *qp, struct ibv_mw *mw, 272 struct ibv_mw_bind *mw_bind) 273 { 274 struct ibv_mw_bind_info *bind_info = &mw_bind->bind_info; 275 struct verbs_mr *vmr; 276 struct irdma_umr *umr; 277 278 struct ibv_send_wr wr = {}; 279 struct ibv_send_wr *bad_wr; 280 int err; 281 282 if (!bind_info->mr && (bind_info->addr || bind_info->length)) 283 return EINVAL; 284 285 if (bind_info->mr) { 286 vmr = verbs_get_mr(bind_info->mr); 287 umr = container_of(vmr, struct irdma_umr, vmr); 288 if (vmr->mr_type != IBV_MR_TYPE_MR) 289 return ENOTSUP; 290 291 if (umr->acc_flags & IBV_ACCESS_ZERO_BASED) 292 return EINVAL; 293 294 if (mw->pd != bind_info->mr->pd) 295 return EPERM; 296 } 297 298 wr.opcode = IBV_WR_BIND_MW; 299 wr.bind_mw.bind_info = mw_bind->bind_info; 300 wr.bind_mw.mw = mw; 301 wr.bind_mw.rkey = ibv_inc_rkey(mw->rkey); 302 303 wr.wr_id = mw_bind->wr_id; 304 wr.send_flags = mw_bind->send_flags; 305 306 err = irdma_upost_send(qp, &wr, &bad_wr); 307 if (!err) 308 mw->rkey = wr.bind_mw.rkey; 309 310 return err; 311 } 312 313 /** 314 * irdma_udealloc_mw - deallocate memory window 315 * @mw: memory window to dealloc 316 */ 317 int 318 irdma_udealloc_mw(struct ibv_mw *mw) 319 { 320 int ret; 321 struct ibv_dealloc_mw cmd; 322 323 ret = ibv_cmd_dealloc_mw(mw, &cmd, sizeof(cmd)); 324 if (ret) 325 return ret; 326 free(mw); 327 328 return 0; 329 } 330 331 static void * 332 irdma_alloc_hw_buf(size_t size) 333 { 334 void *buf; 335 336 buf = memalign(IRDMA_HW_PAGE_SIZE, size); 337 338 if (!buf) 339 return NULL; 340 if (ibv_dontfork_range(buf, size)) { 341 free(buf); 342 return NULL; 343 } 344 345 return buf; 346 } 347 348 static void 349 irdma_free_hw_buf(void *buf, size_t size) 350 { 351 ibv_dofork_range(buf, size); 352 free(buf); 353 } 354 355 /** 356 * get_cq_size - returns actual cqe needed by HW 357 * @ncqe: minimum cqes requested by application 358 * @hw_rev: HW generation 359 */ 360 static inline int 361 get_cq_size(int ncqe, u8 hw_rev) 362 { 363 ncqe++; 364 365 /* Completions with immediate require 1 extra entry */ 366 if (hw_rev > IRDMA_GEN_1) 367 ncqe *= 2; 368 369 if (ncqe < IRDMA_U_MINCQ_SIZE) 370 ncqe = IRDMA_U_MINCQ_SIZE; 371 372 return ncqe; 373 } 374 375 static inline size_t get_cq_total_bytes(u32 cq_size) { 376 return roundup(cq_size * sizeof(struct irdma_cqe), IRDMA_HW_PAGE_SIZE); 377 } 378 379 /** 380 * ucreate_cq - irdma util function to create a CQ 381 * @context: ibv context 382 * @attr_ex: CQ init attributes 383 * @ext_cq: flag to create an extendable or normal CQ 384 */ 385 static struct ibv_cq_ex * 386 ucreate_cq(struct ibv_context *context, 387 struct ibv_cq_init_attr_ex *attr_ex, 388 bool ext_cq) 389 { 390 struct irdma_cq_uk_init_info info = {}; 391 struct irdma_ureg_mr reg_mr_cmd = {}; 392 struct irdma_ucreate_cq_ex cmd = {}; 393 struct irdma_ucreate_cq_ex_resp resp = {}; 394 struct ibv_reg_mr_resp reg_mr_resp = {}; 395 struct irdma_ureg_mr reg_mr_shadow_cmd = {}; 396 struct ibv_reg_mr_resp reg_mr_shadow_resp = {}; 397 struct irdma_uk_attrs *uk_attrs; 398 struct irdma_uvcontext *iwvctx; 399 struct irdma_ucq *iwucq; 400 size_t total_size; 401 u32 cq_pages; 402 int ret, ncqe; 403 u8 hw_rev; 404 405 iwvctx = container_of(context, struct irdma_uvcontext, ibv_ctx); 406 uk_attrs = &iwvctx->uk_attrs; 407 hw_rev = uk_attrs->hw_rev; 408 409 if (ext_cq && hw_rev == IRDMA_GEN_1) { 410 errno = EOPNOTSUPP; 411 return NULL; 412 } 413 414 if (attr_ex->cqe < IRDMA_MIN_CQ_SIZE || attr_ex->cqe > uk_attrs->max_hw_cq_size) { 415 errno = EINVAL; 416 return NULL; 417 } 418 419 /* save the cqe requested by application */ 420 ncqe = attr_ex->cqe; 421 422 iwucq = calloc(1, sizeof(*iwucq)); 423 if (!iwucq) 424 return NULL; 425 426 if (pthread_spin_init(&iwucq->lock, PTHREAD_PROCESS_PRIVATE)) { 427 free(iwucq); 428 return NULL; 429 } 430 431 info.cq_size = get_cq_size(attr_ex->cqe, hw_rev); 432 iwucq->comp_vector = attr_ex->comp_vector; 433 LIST_INIT(&iwucq->resize_list); 434 LIST_INIT(&iwucq->cmpl_generated); 435 total_size = get_cq_total_bytes(info.cq_size); 436 cq_pages = total_size >> IRDMA_HW_PAGE_SHIFT; 437 438 if (!(uk_attrs->feature_flags & IRDMA_FEATURE_CQ_RESIZE)) 439 total_size = (cq_pages << IRDMA_HW_PAGE_SHIFT) + IRDMA_DB_SHADOW_AREA_SIZE; 440 441 iwucq->buf_size = total_size; 442 info.cq_base = irdma_alloc_hw_buf(total_size); 443 if (!info.cq_base) 444 goto err_cq_base; 445 446 memset(info.cq_base, 0, total_size); 447 reg_mr_cmd.reg_type = IRDMA_MEMREG_TYPE_CQ; 448 reg_mr_cmd.cq_pages = cq_pages; 449 450 ret = ibv_cmd_reg_mr(&iwvctx->iwupd->ibv_pd, info.cq_base, 451 total_size, (uintptr_t)info.cq_base, 452 IBV_ACCESS_LOCAL_WRITE, &iwucq->vmr.ibv_mr, 453 ®_mr_cmd.ibv_cmd, sizeof(reg_mr_cmd), 454 ®_mr_resp, sizeof(reg_mr_resp)); 455 if (ret) { 456 errno = ret; 457 goto err_dereg_mr; 458 } 459 460 iwucq->vmr.ibv_mr.pd = &iwvctx->iwupd->ibv_pd; 461 462 if (uk_attrs->feature_flags & IRDMA_FEATURE_CQ_RESIZE) { 463 info.shadow_area = irdma_alloc_hw_buf(IRDMA_DB_SHADOW_AREA_SIZE); 464 if (!info.shadow_area) 465 goto err_dereg_mr; 466 467 memset(info.shadow_area, 0, IRDMA_DB_SHADOW_AREA_SIZE); 468 reg_mr_shadow_cmd.reg_type = IRDMA_MEMREG_TYPE_CQ; 469 reg_mr_shadow_cmd.cq_pages = 1; 470 471 ret = ibv_cmd_reg_mr(&iwvctx->iwupd->ibv_pd, info.shadow_area, 472 IRDMA_DB_SHADOW_AREA_SIZE, (uintptr_t)info.shadow_area, 473 IBV_ACCESS_LOCAL_WRITE, &iwucq->vmr_shadow_area.ibv_mr, 474 ®_mr_shadow_cmd.ibv_cmd, sizeof(reg_mr_shadow_cmd), 475 ®_mr_shadow_resp, sizeof(reg_mr_shadow_resp)); 476 if (ret) { 477 errno = ret; 478 goto err_dereg_shadow; 479 } 480 481 iwucq->vmr_shadow_area.ibv_mr.pd = &iwvctx->iwupd->ibv_pd; 482 483 } else { 484 info.shadow_area = (__le64 *) ((u8 *)info.cq_base + (cq_pages << IRDMA_HW_PAGE_SHIFT)); 485 } 486 487 attr_ex->cqe = info.cq_size; 488 cmd.user_cq_buf = (__u64) ((uintptr_t)info.cq_base); 489 cmd.user_shadow_area = (__u64) ((uintptr_t)info.shadow_area); 490 491 ret = ibv_cmd_create_cq_ex(context, attr_ex, &iwucq->verbs_cq.cq_ex, 492 &cmd.ibv_cmd, sizeof(cmd.ibv_cmd), sizeof(cmd), &resp.ibv_resp, 493 sizeof(resp.ibv_resp), sizeof(resp)); 494 if (ret) { 495 errno = ret; 496 goto err_dereg_shadow; 497 } 498 499 if (ext_cq) 500 irdma_ibvcq_ex_fill_priv_funcs(iwucq, attr_ex); 501 info.cq_id = resp.cq_id; 502 /* Do not report the cqe's burned by HW */ 503 iwucq->verbs_cq.cq.cqe = ncqe; 504 505 info.cqe_alloc_db = (u32 *)((u8 *)iwvctx->db + IRDMA_DB_CQ_OFFSET); 506 irdma_uk_cq_init(&iwucq->cq, &info); 507 508 return &iwucq->verbs_cq.cq_ex; 509 510 err_dereg_shadow: 511 ibv_cmd_dereg_mr(&iwucq->vmr.ibv_mr); 512 if (iwucq->vmr_shadow_area.ibv_mr.handle) { 513 ibv_cmd_dereg_mr(&iwucq->vmr_shadow_area.ibv_mr); 514 irdma_free_hw_buf(info.shadow_area, IRDMA_HW_PAGE_SIZE); 515 } 516 err_dereg_mr: 517 irdma_free_hw_buf(info.cq_base, total_size); 518 err_cq_base: 519 printf("%s: failed to initialize CQ\n", __func__); 520 pthread_spin_destroy(&iwucq->lock); 521 522 free(iwucq); 523 524 return NULL; 525 } 526 527 struct ibv_cq * 528 irdma_ucreate_cq(struct ibv_context *context, int cqe, 529 struct ibv_comp_channel *channel, 530 int comp_vector) 531 { 532 struct ibv_cq_init_attr_ex attr_ex = { 533 .cqe = cqe, 534 .channel = channel, 535 .comp_vector = comp_vector, 536 }; 537 struct ibv_cq_ex *ibvcq_ex; 538 539 ibvcq_ex = ucreate_cq(context, &attr_ex, false); 540 541 return ibvcq_ex ? ibv_cq_ex_to_cq(ibvcq_ex) : NULL; 542 } 543 544 struct ibv_cq_ex * 545 irdma_ucreate_cq_ex(struct ibv_context *context, 546 struct ibv_cq_init_attr_ex *attr_ex) 547 { 548 if (attr_ex->wc_flags & ~IRDMA_CQ_SUPPORTED_WC_FLAGS) { 549 errno = EOPNOTSUPP; 550 return NULL; 551 } 552 553 return ucreate_cq(context, attr_ex, true); 554 } 555 556 /** 557 * irdma_free_cq_buf - free memory for cq buffer 558 * @cq_buf: cq buf to free 559 */ 560 static void 561 irdma_free_cq_buf(struct irdma_cq_buf *cq_buf) 562 { 563 ibv_cmd_dereg_mr(&cq_buf->vmr.ibv_mr); 564 irdma_free_hw_buf(cq_buf->cq.cq_base, get_cq_total_bytes(cq_buf->cq.cq_size)); 565 free(cq_buf); 566 } 567 568 /** 569 * irdma_process_resize_list - process the cq list to remove buffers 570 * @iwucq: cq which owns the list 571 * @lcqe_buf: cq buf where the last cqe is found 572 */ 573 static int 574 irdma_process_resize_list(struct irdma_ucq *iwucq, 575 struct irdma_cq_buf *lcqe_buf) 576 { 577 struct irdma_cq_buf *cq_buf, *next; 578 int cq_cnt = 0; 579 580 LIST_FOREACH_SAFE(cq_buf, &iwucq->resize_list, list, next) { 581 if (cq_buf == lcqe_buf) 582 return cq_cnt; 583 584 LIST_REMOVE(cq_buf, list); 585 irdma_free_cq_buf(cq_buf); 586 cq_cnt++; 587 } 588 589 return cq_cnt; 590 } 591 592 static void 593 irdma_remove_cmpls_list(struct irdma_ucq *iwucq) 594 { 595 struct irdma_cmpl_gen *cmpl_node, *next; 596 597 LIST_FOREACH_SAFE(cmpl_node, &iwucq->cmpl_generated, list, next) { 598 LIST_REMOVE(cmpl_node, list); 599 free(cmpl_node); 600 } 601 } 602 603 static int 604 irdma_generated_cmpls(struct irdma_ucq *iwucq, struct irdma_cq_poll_info *cq_poll_info) 605 { 606 struct irdma_cmpl_gen *cmpl; 607 608 if (!iwucq || LIST_EMPTY(&iwucq->cmpl_generated)) 609 return ENOENT; 610 cmpl = LIST_FIRST(&iwucq->cmpl_generated); 611 LIST_REMOVE(cmpl, list); 612 memcpy(cq_poll_info, &cmpl->cpi, sizeof(*cq_poll_info)); 613 614 free(cmpl); 615 616 return 0; 617 } 618 619 /** 620 * irdma_set_cpi_common_values - fill in values for polling info struct 621 * @cpi: resulting structure of cq_poll_info type 622 * @qp: QPair 623 * @qp_num: id of the QP 624 */ 625 static void 626 irdma_set_cpi_common_values(struct irdma_cq_poll_info *cpi, 627 struct irdma_qp_uk *qp, __u32 qp_num) 628 { 629 cpi->comp_status = IRDMA_COMPL_STATUS_FLUSHED; 630 cpi->error = 1; 631 cpi->major_err = IRDMA_FLUSH_MAJOR_ERR; 632 cpi->minor_err = FLUSH_GENERAL_ERR; 633 cpi->qp_handle = (irdma_qp_handle) (uintptr_t)qp; 634 cpi->qp_id = qp_num; 635 } 636 637 static bool 638 irdma_cq_empty(struct irdma_ucq *iwucq) 639 { 640 struct irdma_cq_uk *ukcq; 641 __u64 qword3; 642 __le64 *cqe; 643 __u8 polarity; 644 645 ukcq = &iwucq->cq; 646 cqe = IRDMA_GET_CURRENT_CQ_ELEM(ukcq); 647 get_64bit_val(cqe, 24, &qword3); 648 polarity = (__u8) RS_64(qword3, IRDMA_CQ_VALID); 649 650 return polarity != ukcq->polarity; 651 } 652 653 /** 654 * irdma_generate_flush_completions - generate completion from WRs 655 * @iwuqp: pointer to QP 656 */ 657 static void 658 irdma_generate_flush_completions(struct irdma_uqp *iwuqp) 659 { 660 struct irdma_qp_uk *qp = &iwuqp->qp; 661 struct irdma_ring *sq_ring = &qp->sq_ring; 662 struct irdma_ring *rq_ring = &qp->rq_ring; 663 struct irdma_cmpl_gen *cmpl; 664 __le64 *sw_wqe; 665 __u64 wqe_qword; 666 __u32 wqe_idx; 667 668 if (pthread_spin_lock(&iwuqp->send_cq->lock)) 669 return; 670 if (irdma_cq_empty(iwuqp->send_cq)) { 671 while (IRDMA_RING_MORE_WORK(*sq_ring)) { 672 cmpl = malloc(sizeof(*cmpl)); 673 if (!cmpl) { 674 pthread_spin_unlock(&iwuqp->send_cq->lock); 675 return; 676 } 677 678 wqe_idx = sq_ring->tail; 679 irdma_set_cpi_common_values(&cmpl->cpi, qp, qp->qp_id); 680 cmpl->cpi.wr_id = qp->sq_wrtrk_array[wqe_idx].wrid; 681 sw_wqe = qp->sq_base[wqe_idx].elem; 682 get_64bit_val(sw_wqe, 24, &wqe_qword); 683 cmpl->cpi.op_type = (__u8) RS_64(wqe_qword, IRDMAQPSQ_OPCODE); 684 /* remove the SQ WR by moving SQ tail */ 685 IRDMA_RING_SET_TAIL(*sq_ring, sq_ring->tail + qp->sq_wrtrk_array[sq_ring->tail].quanta); 686 LIST_INSERT_HEAD(&iwuqp->send_cq->cmpl_generated, cmpl, list); 687 } 688 } 689 pthread_spin_unlock(&iwuqp->send_cq->lock); 690 if (pthread_spin_lock(&iwuqp->recv_cq->lock)) 691 return; 692 if (irdma_cq_empty(iwuqp->recv_cq)) { 693 while (IRDMA_RING_MORE_WORK(*rq_ring)) { 694 cmpl = malloc(sizeof(*cmpl)); 695 if (!cmpl) { 696 pthread_spin_unlock(&iwuqp->recv_cq->lock); 697 return; 698 } 699 700 wqe_idx = rq_ring->tail; 701 irdma_set_cpi_common_values(&cmpl->cpi, qp, qp->qp_id); 702 cmpl->cpi.wr_id = qp->rq_wrid_array[wqe_idx]; 703 cmpl->cpi.op_type = IRDMA_OP_TYPE_REC; 704 /* remove the RQ WR by moving RQ tail */ 705 IRDMA_RING_SET_TAIL(*rq_ring, rq_ring->tail + 1); 706 LIST_INSERT_HEAD(&iwuqp->recv_cq->cmpl_generated, cmpl, list); 707 } 708 } 709 pthread_spin_unlock(&iwuqp->recv_cq->lock); 710 } 711 712 void * 713 irdma_flush_thread(void *arg) 714 { 715 __u8 i = 5; 716 struct irdma_uqp *iwuqp = arg; 717 718 while (--i) { 719 if (pthread_spin_lock(&iwuqp->lock)) 720 break; 721 irdma_generate_flush_completions(arg); 722 pthread_spin_unlock(&iwuqp->lock); 723 sleep(1); 724 } 725 pthread_exit(NULL); 726 } 727 728 /** 729 * irdma_udestroy_cq - destroys cq 730 * @cq: ptr to cq to be destroyed 731 */ 732 int 733 irdma_udestroy_cq(struct ibv_cq *cq) 734 { 735 struct irdma_uk_attrs *uk_attrs; 736 struct irdma_uvcontext *iwvctx; 737 struct irdma_ucq *iwucq; 738 int ret; 739 740 iwucq = container_of(cq, struct irdma_ucq, verbs_cq.cq); 741 iwvctx = container_of(cq->context, struct irdma_uvcontext, ibv_ctx); 742 uk_attrs = &iwvctx->uk_attrs; 743 744 ret = pthread_spin_destroy(&iwucq->lock); 745 if (ret) 746 goto err; 747 748 if (!LIST_EMPTY(&iwucq->cmpl_generated)) 749 irdma_remove_cmpls_list(iwucq); 750 irdma_process_resize_list(iwucq, NULL); 751 ret = ibv_cmd_destroy_cq(cq); 752 if (ret) 753 goto err; 754 755 ibv_cmd_dereg_mr(&iwucq->vmr.ibv_mr); 756 irdma_free_hw_buf(iwucq->cq.cq_base, iwucq->buf_size); 757 758 if (uk_attrs->feature_flags & IRDMA_FEATURE_CQ_RESIZE) { 759 ibv_cmd_dereg_mr(&iwucq->vmr_shadow_area.ibv_mr); 760 irdma_free_hw_buf(iwucq->cq.shadow_area, IRDMA_DB_SHADOW_AREA_SIZE); 761 } 762 free(iwucq); 763 return 0; 764 765 err: 766 return ret; 767 } 768 769 static enum ibv_wc_status 770 irdma_flush_err_to_ib_wc_status(enum irdma_flush_opcode opcode) 771 { 772 switch (opcode) { 773 case FLUSH_PROT_ERR: 774 return IBV_WC_LOC_PROT_ERR; 775 case FLUSH_REM_ACCESS_ERR: 776 return IBV_WC_REM_ACCESS_ERR; 777 case FLUSH_LOC_QP_OP_ERR: 778 return IBV_WC_LOC_QP_OP_ERR; 779 case FLUSH_REM_OP_ERR: 780 return IBV_WC_REM_OP_ERR; 781 case FLUSH_LOC_LEN_ERR: 782 return IBV_WC_LOC_LEN_ERR; 783 case FLUSH_GENERAL_ERR: 784 return IBV_WC_WR_FLUSH_ERR; 785 case FLUSH_MW_BIND_ERR: 786 return IBV_WC_MW_BIND_ERR; 787 case FLUSH_REM_INV_REQ_ERR: 788 return IBV_WC_REM_INV_REQ_ERR; 789 case FLUSH_RETRY_EXC_ERR: 790 return IBV_WC_RETRY_EXC_ERR; 791 case FLUSH_FATAL_ERR: 792 default: 793 return IBV_WC_FATAL_ERR; 794 } 795 } 796 797 /** 798 * irdma_process_cqe_ext - process current cqe for extended CQ 799 * @cur_cqe - current cqe info 800 */ 801 static void 802 irdma_process_cqe_ext(struct irdma_cq_poll_info *cur_cqe) 803 { 804 struct irdma_ucq *iwucq = container_of(cur_cqe, struct irdma_ucq, cur_cqe); 805 struct ibv_cq_ex *ibvcq_ex = &iwucq->verbs_cq.cq_ex; 806 807 ibvcq_ex->wr_id = cur_cqe->wr_id; 808 if (cur_cqe->error) 809 ibvcq_ex->status = (cur_cqe->comp_status == IRDMA_COMPL_STATUS_FLUSHED) ? 810 irdma_flush_err_to_ib_wc_status(cur_cqe->minor_err) : IBV_WC_GENERAL_ERR; 811 else 812 ibvcq_ex->status = IBV_WC_SUCCESS; 813 } 814 815 /** 816 * irdma_process_cqe - process current cqe info 817 * @entry - ibv_wc object to fill in for non-extended CQ 818 * @cur_cqe - current cqe info 819 */ 820 static void 821 irdma_process_cqe(struct ibv_wc *entry, struct irdma_cq_poll_info *cur_cqe) 822 { 823 struct irdma_qp_uk *qp; 824 struct ibv_qp *ib_qp; 825 826 entry->wc_flags = 0; 827 entry->wr_id = cur_cqe->wr_id; 828 entry->qp_num = cur_cqe->qp_id; 829 qp = cur_cqe->qp_handle; 830 ib_qp = qp->back_qp; 831 832 if (cur_cqe->error) { 833 if (cur_cqe->comp_status == IRDMA_COMPL_STATUS_FLUSHED) 834 entry->status = (cur_cqe->comp_status == IRDMA_COMPL_STATUS_FLUSHED) ? 835 irdma_flush_err_to_ib_wc_status(cur_cqe->minor_err) : IBV_WC_GENERAL_ERR; 836 entry->vendor_err = cur_cqe->major_err << 16 | 837 cur_cqe->minor_err; 838 } else { 839 entry->status = IBV_WC_SUCCESS; 840 } 841 842 if (cur_cqe->imm_valid) { 843 entry->imm_data = htonl(cur_cqe->imm_data); 844 entry->wc_flags |= IBV_WC_WITH_IMM; 845 } 846 847 switch (cur_cqe->op_type) { 848 case IRDMA_OP_TYPE_RDMA_WRITE: 849 case IRDMA_OP_TYPE_RDMA_WRITE_SOL: 850 entry->opcode = IBV_WC_RDMA_WRITE; 851 break; 852 case IRDMA_OP_TYPE_RDMA_READ: 853 entry->opcode = IBV_WC_RDMA_READ; 854 break; 855 case IRDMA_OP_TYPE_SEND_SOL: 856 case IRDMA_OP_TYPE_SEND_SOL_INV: 857 case IRDMA_OP_TYPE_SEND_INV: 858 case IRDMA_OP_TYPE_SEND: 859 entry->opcode = IBV_WC_SEND; 860 break; 861 case IRDMA_OP_TYPE_BIND_MW: 862 entry->opcode = IBV_WC_BIND_MW; 863 break; 864 case IRDMA_OP_TYPE_REC: 865 entry->opcode = IBV_WC_RECV; 866 if (ib_qp->qp_type != IBV_QPT_UD && 867 cur_cqe->stag_invalid_set) { 868 entry->invalidated_rkey = cur_cqe->inv_stag; 869 entry->wc_flags |= IBV_WC_WITH_INV; 870 } 871 break; 872 case IRDMA_OP_TYPE_REC_IMM: 873 entry->opcode = IBV_WC_RECV_RDMA_WITH_IMM; 874 if (ib_qp->qp_type != IBV_QPT_UD && 875 cur_cqe->stag_invalid_set) { 876 entry->invalidated_rkey = cur_cqe->inv_stag; 877 entry->wc_flags |= IBV_WC_WITH_INV; 878 } 879 break; 880 case IRDMA_OP_TYPE_INV_STAG: 881 entry->opcode = IBV_WC_LOCAL_INV; 882 break; 883 default: 884 entry->status = IBV_WC_GENERAL_ERR; 885 printf("%s: Invalid opcode = %d in CQE\n", 886 __func__, cur_cqe->op_type); 887 return; 888 } 889 890 if (ib_qp->qp_type == IBV_QPT_UD) { 891 entry->src_qp = cur_cqe->ud_src_qpn; 892 entry->wc_flags |= IBV_WC_GRH; 893 } else { 894 entry->src_qp = cur_cqe->qp_id; 895 } 896 entry->byte_len = cur_cqe->bytes_xfered; 897 } 898 899 /** 900 * irdma_poll_one - poll one entry of the CQ 901 * @ukcq: ukcq to poll 902 * @cur_cqe: current CQE info to be filled in 903 * @entry: ibv_wc object to be filled for non-extended CQ or NULL for extended CQ 904 * 905 * Returns the internal irdma device error code or 0 on success 906 */ 907 static int 908 irdma_poll_one(struct irdma_cq_uk *ukcq, struct irdma_cq_poll_info *cur_cqe, 909 struct ibv_wc *entry) 910 { 911 int ret = irdma_uk_cq_poll_cmpl(ukcq, cur_cqe); 912 913 if (ret) 914 return ret; 915 916 if (!entry) 917 irdma_process_cqe_ext(cur_cqe); 918 else 919 irdma_process_cqe(entry, cur_cqe); 920 921 return 0; 922 } 923 924 /** 925 * __irdma_upoll_cq - irdma util function to poll device CQ 926 * @iwucq: irdma cq to poll 927 * @num_entries: max cq entries to poll 928 * @entry: pointer to array of ibv_wc objects to be filled in for each completion or NULL if ext CQ 929 * 930 * Returns non-negative value equal to the number of completions 931 * found. On failure, EINVAL 932 */ 933 static int 934 __irdma_upoll_cq(struct irdma_ucq *iwucq, int num_entries, 935 struct ibv_wc *entry) 936 { 937 struct irdma_cq_buf *cq_buf, *next; 938 struct irdma_cq_buf *last_buf = NULL; 939 struct irdma_cq_poll_info *cur_cqe = &iwucq->cur_cqe; 940 bool cq_new_cqe = false; 941 int resized_bufs = 0; 942 int npolled = 0; 943 int ret; 944 945 /* go through the list of previously resized CQ buffers */ 946 LIST_FOREACH_SAFE(cq_buf, &iwucq->resize_list, list, next) { 947 while (npolled < num_entries) { 948 ret = irdma_poll_one(&cq_buf->cq, cur_cqe, 949 entry ? entry + npolled : NULL); 950 if (!ret) { 951 ++npolled; 952 cq_new_cqe = true; 953 continue; 954 } 955 if (ret == ENOENT) 956 break; 957 /* QP using the CQ is destroyed. Skip reporting this CQE */ 958 if (ret == EFAULT) { 959 cq_new_cqe = true; 960 continue; 961 } 962 goto error; 963 } 964 965 /* save the resized CQ buffer which received the last cqe */ 966 if (cq_new_cqe) 967 last_buf = cq_buf; 968 cq_new_cqe = false; 969 } 970 971 /* check the current CQ for new cqes */ 972 while (npolled < num_entries) { 973 ret = irdma_poll_one(&iwucq->cq, cur_cqe, 974 entry ? entry + npolled : NULL); 975 if (ret == ENOENT) { 976 ret = irdma_generated_cmpls(iwucq, cur_cqe); 977 if (!ret) { 978 if (entry) 979 irdma_process_cqe(entry + npolled, cur_cqe); 980 else 981 irdma_process_cqe_ext(cur_cqe); 982 } 983 } 984 if (!ret) { 985 ++npolled; 986 cq_new_cqe = true; 987 continue; 988 } 989 if (ret == ENOENT) 990 break; 991 /* QP using the CQ is destroyed. Skip reporting this CQE */ 992 if (ret == EFAULT) { 993 cq_new_cqe = true; 994 continue; 995 } 996 goto error; 997 } 998 999 if (cq_new_cqe) 1000 /* all previous CQ resizes are complete */ 1001 resized_bufs = irdma_process_resize_list(iwucq, NULL); 1002 else if (last_buf) 1003 /* only CQ resizes up to the last_buf are complete */ 1004 resized_bufs = irdma_process_resize_list(iwucq, last_buf); 1005 if (resized_bufs) 1006 /* report to the HW the number of complete CQ resizes */ 1007 irdma_uk_cq_set_resized_cnt(&iwucq->cq, resized_bufs); 1008 1009 return npolled; 1010 1011 error: 1012 printf("%s: Error polling CQ, irdma_err: %d\n", __func__, ret); 1013 1014 return EINVAL; 1015 } 1016 1017 /** 1018 * irdma_upoll_cq - verb API callback to poll device CQ 1019 * @cq: ibv_cq to poll 1020 * @num_entries: max cq entries to poll 1021 * @entry: pointer to array of ibv_wc objects to be filled in for each completion 1022 * 1023 * Returns non-negative value equal to the number of completions 1024 * found and a negative error code on failure 1025 */ 1026 int 1027 irdma_upoll_cq(struct ibv_cq *cq, int num_entries, struct ibv_wc *entry) 1028 { 1029 struct irdma_ucq *iwucq; 1030 int ret; 1031 1032 iwucq = container_of(cq, struct irdma_ucq, verbs_cq.cq); 1033 ret = pthread_spin_lock(&iwucq->lock); 1034 if (ret) 1035 return -ret; 1036 1037 ret = __irdma_upoll_cq(iwucq, num_entries, entry); 1038 1039 pthread_spin_unlock(&iwucq->lock); 1040 1041 return ret; 1042 } 1043 1044 /** 1045 * irdma_start_poll - verb_ex API callback to poll batch of WC's 1046 * @ibvcq_ex: ibv extended CQ 1047 * @attr: attributes (not used) 1048 * 1049 * Start polling batch of work completions. Return 0 on success, ENONENT when 1050 * no completions are available on CQ. And an error code on errors 1051 */ 1052 static int 1053 irdma_start_poll(struct ibv_cq_ex *ibvcq_ex, struct ibv_poll_cq_attr *attr) 1054 { 1055 struct irdma_ucq *iwucq; 1056 int ret; 1057 1058 iwucq = container_of(ibvcq_ex, struct irdma_ucq, verbs_cq.cq_ex); 1059 ret = pthread_spin_lock(&iwucq->lock); 1060 if (ret) 1061 return ret; 1062 1063 ret = __irdma_upoll_cq(iwucq, 1, NULL); 1064 if (ret == 1) 1065 return 0; 1066 1067 /* No Completions on CQ */ 1068 if (!ret) 1069 ret = ENOENT; 1070 1071 pthread_spin_unlock(&iwucq->lock); 1072 1073 return ret; 1074 } 1075 1076 /** 1077 * irdma_next_poll - verb_ex API callback to get next WC 1078 * @ibvcq_ex: ibv extended CQ 1079 * 1080 * Return 0 on success, ENONENT when no completions are available on CQ. 1081 * And an error code on errors 1082 */ 1083 static int 1084 irdma_next_poll(struct ibv_cq_ex *ibvcq_ex) 1085 { 1086 struct irdma_ucq *iwucq; 1087 int ret; 1088 1089 iwucq = container_of(ibvcq_ex, struct irdma_ucq, verbs_cq.cq_ex); 1090 ret = __irdma_upoll_cq(iwucq, 1, NULL); 1091 if (ret == 1) 1092 return 0; 1093 1094 /* No Completions on CQ */ 1095 if (!ret) 1096 ret = ENOENT; 1097 1098 return ret; 1099 } 1100 1101 /** 1102 * irdma_end_poll - verb_ex API callback to end polling of WC's 1103 * @ibvcq_ex: ibv extended CQ 1104 */ 1105 static void 1106 irdma_end_poll(struct ibv_cq_ex *ibvcq_ex) 1107 { 1108 struct irdma_ucq *iwucq = container_of(ibvcq_ex, struct irdma_ucq, 1109 verbs_cq.cq_ex); 1110 1111 pthread_spin_unlock(&iwucq->lock); 1112 } 1113 1114 /** 1115 * irdma_wc_read_completion_ts - Get completion timestamp 1116 * @ibvcq_ex: ibv extended CQ 1117 * 1118 * Get completion timestamp in HCA clock units 1119 */ 1120 static uint64_t irdma_wc_read_completion_ts(struct ibv_cq_ex *ibvcq_ex){ 1121 struct irdma_ucq *iwucq = container_of(ibvcq_ex, struct irdma_ucq, 1122 verbs_cq.cq_ex); 1123 #define HCA_CORE_CLOCK_800_MHZ 800 1124 1125 return iwucq->cur_cqe.tcp_seq_num_rtt / HCA_CORE_CLOCK_800_MHZ; 1126 } 1127 1128 static enum ibv_wc_opcode 1129 irdma_wc_read_opcode(struct ibv_cq_ex *ibvcq_ex) 1130 { 1131 struct irdma_ucq *iwucq = container_of(ibvcq_ex, struct irdma_ucq, 1132 verbs_cq.cq_ex); 1133 1134 switch (iwucq->cur_cqe.op_type) { 1135 case IRDMA_OP_TYPE_RDMA_WRITE: 1136 case IRDMA_OP_TYPE_RDMA_WRITE_SOL: 1137 return IBV_WC_RDMA_WRITE; 1138 case IRDMA_OP_TYPE_RDMA_READ: 1139 return IBV_WC_RDMA_READ; 1140 case IRDMA_OP_TYPE_SEND_SOL: 1141 case IRDMA_OP_TYPE_SEND_SOL_INV: 1142 case IRDMA_OP_TYPE_SEND_INV: 1143 case IRDMA_OP_TYPE_SEND: 1144 return IBV_WC_SEND; 1145 case IRDMA_OP_TYPE_BIND_MW: 1146 return IBV_WC_BIND_MW; 1147 case IRDMA_OP_TYPE_REC: 1148 return IBV_WC_RECV; 1149 case IRDMA_OP_TYPE_REC_IMM: 1150 return IBV_WC_RECV_RDMA_WITH_IMM; 1151 case IRDMA_OP_TYPE_INV_STAG: 1152 return IBV_WC_LOCAL_INV; 1153 } 1154 1155 printf("%s: Invalid opcode = %d in CQE\n", __func__, 1156 iwucq->cur_cqe.op_type); 1157 1158 return 0; 1159 } 1160 1161 static uint32_t irdma_wc_read_vendor_err(struct ibv_cq_ex *ibvcq_ex){ 1162 struct irdma_cq_poll_info *cur_cqe; 1163 struct irdma_ucq *iwucq; 1164 1165 iwucq = container_of(ibvcq_ex, struct irdma_ucq, verbs_cq.cq_ex); 1166 cur_cqe = &iwucq->cur_cqe; 1167 1168 return cur_cqe->error ? cur_cqe->major_err << 16 | cur_cqe->minor_err : 0; 1169 } 1170 1171 static int 1172 irdma_wc_read_wc_flags(struct ibv_cq_ex *ibvcq_ex) 1173 { 1174 struct irdma_cq_poll_info *cur_cqe; 1175 struct irdma_ucq *iwucq; 1176 struct irdma_qp_uk *qp; 1177 struct ibv_qp *ib_qp; 1178 int wc_flags = 0; 1179 1180 iwucq = container_of(ibvcq_ex, struct irdma_ucq, verbs_cq.cq_ex); 1181 cur_cqe = &iwucq->cur_cqe; 1182 qp = cur_cqe->qp_handle; 1183 ib_qp = qp->back_qp; 1184 1185 if (cur_cqe->imm_valid) 1186 wc_flags |= IBV_WC_WITH_IMM; 1187 1188 if (ib_qp->qp_type == IBV_QPT_UD) { 1189 wc_flags |= IBV_WC_GRH; 1190 } else { 1191 if (cur_cqe->stag_invalid_set) { 1192 switch (cur_cqe->op_type) { 1193 case IRDMA_OP_TYPE_REC: 1194 wc_flags |= IBV_WC_WITH_INV; 1195 break; 1196 case IRDMA_OP_TYPE_REC_IMM: 1197 wc_flags |= IBV_WC_WITH_INV; 1198 break; 1199 } 1200 } 1201 } 1202 1203 return wc_flags; 1204 } 1205 1206 static uint32_t irdma_wc_read_byte_len(struct ibv_cq_ex *ibvcq_ex){ 1207 struct irdma_ucq *iwucq = container_of(ibvcq_ex, struct irdma_ucq, 1208 verbs_cq.cq_ex); 1209 1210 return iwucq->cur_cqe.bytes_xfered; 1211 } 1212 1213 static __be32 irdma_wc_read_imm_data(struct ibv_cq_ex *ibvcq_ex){ 1214 struct irdma_cq_poll_info *cur_cqe; 1215 struct irdma_ucq *iwucq; 1216 1217 iwucq = container_of(ibvcq_ex, struct irdma_ucq, verbs_cq.cq_ex); 1218 cur_cqe = &iwucq->cur_cqe; 1219 1220 return cur_cqe->imm_valid ? htonl(cur_cqe->imm_data) : 0; 1221 } 1222 1223 static uint32_t irdma_wc_read_qp_num(struct ibv_cq_ex *ibvcq_ex){ 1224 struct irdma_ucq *iwucq = container_of(ibvcq_ex, struct irdma_ucq, 1225 verbs_cq.cq_ex); 1226 1227 return iwucq->cur_cqe.qp_id; 1228 } 1229 1230 static uint32_t irdma_wc_read_src_qp(struct ibv_cq_ex *ibvcq_ex){ 1231 struct irdma_cq_poll_info *cur_cqe; 1232 struct irdma_ucq *iwucq; 1233 struct irdma_qp_uk *qp; 1234 struct ibv_qp *ib_qp; 1235 1236 iwucq = container_of(ibvcq_ex, struct irdma_ucq, verbs_cq.cq_ex); 1237 cur_cqe = &iwucq->cur_cqe; 1238 qp = cur_cqe->qp_handle; 1239 ib_qp = qp->back_qp; 1240 1241 return ib_qp->qp_type == IBV_QPT_UD ? cur_cqe->ud_src_qpn : cur_cqe->qp_id; 1242 } 1243 1244 static uint8_t irdma_wc_read_sl(struct ibv_cq_ex *ibvcq_ex){ 1245 return 0; 1246 } 1247 1248 void 1249 irdma_ibvcq_ex_fill_priv_funcs(struct irdma_ucq *iwucq, 1250 struct ibv_cq_init_attr_ex *attr_ex) 1251 { 1252 struct ibv_cq_ex *ibvcq_ex = &iwucq->verbs_cq.cq_ex; 1253 1254 ibvcq_ex->start_poll = irdma_start_poll; 1255 ibvcq_ex->end_poll = irdma_end_poll; 1256 ibvcq_ex->next_poll = irdma_next_poll; 1257 1258 if (attr_ex->wc_flags & IBV_WC_EX_WITH_COMPLETION_TIMESTAMP) { 1259 ibvcq_ex->read_completion_ts = irdma_wc_read_completion_ts; 1260 iwucq->report_rtt = true; 1261 } 1262 1263 ibvcq_ex->read_opcode = irdma_wc_read_opcode; 1264 ibvcq_ex->read_vendor_err = irdma_wc_read_vendor_err; 1265 ibvcq_ex->read_wc_flags = irdma_wc_read_wc_flags; 1266 1267 if (attr_ex->wc_flags & IBV_WC_EX_WITH_BYTE_LEN) 1268 ibvcq_ex->read_byte_len = irdma_wc_read_byte_len; 1269 if (attr_ex->wc_flags & IBV_WC_EX_WITH_IMM) 1270 ibvcq_ex->read_imm_data = irdma_wc_read_imm_data; 1271 if (attr_ex->wc_flags & IBV_WC_EX_WITH_QP_NUM) 1272 ibvcq_ex->read_qp_num = irdma_wc_read_qp_num; 1273 if (attr_ex->wc_flags & IBV_WC_EX_WITH_SRC_QP) 1274 ibvcq_ex->read_src_qp = irdma_wc_read_src_qp; 1275 if (attr_ex->wc_flags & IBV_WC_EX_WITH_SL) 1276 ibvcq_ex->read_sl = irdma_wc_read_sl; 1277 } 1278 1279 /** 1280 * irdma_arm_cq - arm of cq 1281 * @iwucq: cq to which arm 1282 * @cq_notify: notification params 1283 */ 1284 static void 1285 irdma_arm_cq(struct irdma_ucq *iwucq, 1286 enum irdma_cmpl_notify cq_notify) 1287 { 1288 iwucq->is_armed = true; 1289 iwucq->arm_sol = true; 1290 iwucq->skip_arm = false; 1291 iwucq->skip_sol = true; 1292 irdma_uk_cq_request_notification(&iwucq->cq, cq_notify); 1293 } 1294 1295 /** 1296 * irdma_uarm_cq - callback for arm of cq 1297 * @cq: cq to arm 1298 * @solicited: to get notify params 1299 */ 1300 int 1301 irdma_uarm_cq(struct ibv_cq *cq, int solicited) 1302 { 1303 struct irdma_ucq *iwucq; 1304 enum irdma_cmpl_notify cq_notify = IRDMA_CQ_COMPL_EVENT; 1305 int ret; 1306 1307 iwucq = container_of(cq, struct irdma_ucq, verbs_cq.cq); 1308 if (solicited) 1309 cq_notify = IRDMA_CQ_COMPL_SOLICITED; 1310 1311 ret = pthread_spin_lock(&iwucq->lock); 1312 if (ret) 1313 return ret; 1314 1315 if (iwucq->is_armed) { 1316 if (iwucq->arm_sol && !solicited) { 1317 irdma_arm_cq(iwucq, cq_notify); 1318 } else { 1319 iwucq->skip_arm = true; 1320 iwucq->skip_sol = solicited ? true : false; 1321 } 1322 } else { 1323 irdma_arm_cq(iwucq, cq_notify); 1324 } 1325 1326 pthread_spin_unlock(&iwucq->lock); 1327 1328 return 0; 1329 } 1330 1331 /** 1332 * irdma_cq_event - cq to do completion event 1333 * @cq: cq to arm 1334 */ 1335 void 1336 irdma_cq_event(struct ibv_cq *cq) 1337 { 1338 struct irdma_ucq *iwucq; 1339 1340 iwucq = container_of(cq, struct irdma_ucq, verbs_cq.cq); 1341 if (pthread_spin_lock(&iwucq->lock)) 1342 return; 1343 1344 if (iwucq->skip_arm) 1345 irdma_arm_cq(iwucq, IRDMA_CQ_COMPL_EVENT); 1346 else 1347 iwucq->is_armed = false; 1348 1349 pthread_spin_unlock(&iwucq->lock); 1350 } 1351 1352 void * 1353 irdma_mmap(int fd, off_t offset) 1354 { 1355 void *map; 1356 1357 map = mmap(NULL, IRDMA_HW_PAGE_SIZE, PROT_WRITE | PROT_READ, MAP_SHARED, 1358 fd, offset); 1359 if (map == MAP_FAILED) 1360 return map; 1361 1362 if (ibv_dontfork_range(map, IRDMA_HW_PAGE_SIZE)) { 1363 munmap(map, IRDMA_HW_PAGE_SIZE); 1364 return MAP_FAILED; 1365 } 1366 1367 return map; 1368 } 1369 1370 void 1371 irdma_munmap(void *map) 1372 { 1373 ibv_dofork_range(map, IRDMA_HW_PAGE_SIZE); 1374 munmap(map, IRDMA_HW_PAGE_SIZE); 1375 } 1376 1377 /** 1378 * irdma_destroy_vmapped_qp - destroy resources for qp 1379 * @iwuqp: qp struct for resources 1380 */ 1381 static int 1382 irdma_destroy_vmapped_qp(struct irdma_uqp *iwuqp) 1383 { 1384 int ret; 1385 1386 ret = ibv_cmd_destroy_qp(&iwuqp->ibv_qp); 1387 if (ret) 1388 return ret; 1389 1390 if (iwuqp->qp.push_db) 1391 irdma_munmap(iwuqp->qp.push_db); 1392 if (iwuqp->qp.push_wqe) 1393 irdma_munmap(iwuqp->qp.push_wqe); 1394 1395 ibv_cmd_dereg_mr(&iwuqp->vmr.ibv_mr); 1396 1397 return 0; 1398 } 1399 1400 /** 1401 * irdma_vmapped_qp - create resources for qp 1402 * @iwuqp: qp struct for resources 1403 * @pd: pd for the qp 1404 * @attr: attributes of qp passed 1405 * @resp: response back from create qp 1406 * @sqdepth: depth of sq 1407 * @rqdepth: depth of rq 1408 * @info: info for initializing user level qp 1409 * @abi_ver: abi version of the create qp command 1410 */ 1411 static int 1412 irdma_vmapped_qp(struct irdma_uqp *iwuqp, struct ibv_pd *pd, 1413 struct ibv_qp_init_attr *attr, int sqdepth, 1414 int rqdepth, struct irdma_qp_uk_init_info *info, 1415 bool legacy_mode) 1416 { 1417 struct irdma_ucreate_qp cmd = {}; 1418 size_t sqsize, rqsize, totalqpsize; 1419 struct irdma_ucreate_qp_resp resp = {}; 1420 struct irdma_ureg_mr reg_mr_cmd = {}; 1421 struct ibv_reg_mr_resp reg_mr_resp = {}; 1422 int ret; 1423 1424 sqsize = roundup(sqdepth * IRDMA_QP_WQE_MIN_SIZE, IRDMA_HW_PAGE_SIZE); 1425 rqsize = roundup(rqdepth * IRDMA_QP_WQE_MIN_SIZE, IRDMA_HW_PAGE_SIZE); 1426 totalqpsize = rqsize + sqsize + IRDMA_DB_SHADOW_AREA_SIZE; 1427 info->sq = irdma_alloc_hw_buf(totalqpsize); 1428 iwuqp->buf_size = totalqpsize; 1429 1430 if (!info->sq) 1431 return ENOMEM; 1432 1433 memset(info->sq, 0, totalqpsize); 1434 info->rq = &info->sq[sqsize / IRDMA_QP_WQE_MIN_SIZE]; 1435 info->shadow_area = info->rq[rqsize / IRDMA_QP_WQE_MIN_SIZE].elem; 1436 1437 reg_mr_cmd.reg_type = IRDMA_MEMREG_TYPE_QP; 1438 reg_mr_cmd.sq_pages = sqsize >> IRDMA_HW_PAGE_SHIFT; 1439 reg_mr_cmd.rq_pages = rqsize >> IRDMA_HW_PAGE_SHIFT; 1440 1441 ret = ibv_cmd_reg_mr(pd, info->sq, totalqpsize, 1442 (uintptr_t)info->sq, IBV_ACCESS_LOCAL_WRITE, 1443 &iwuqp->vmr.ibv_mr, ®_mr_cmd.ibv_cmd, 1444 sizeof(reg_mr_cmd), ®_mr_resp, 1445 sizeof(reg_mr_resp)); 1446 if (ret) 1447 goto err_dereg_mr; 1448 1449 cmd.user_wqe_bufs = (__u64) ((uintptr_t)info->sq); 1450 cmd.user_compl_ctx = (__u64) (uintptr_t)&iwuqp->qp; 1451 ret = ibv_cmd_create_qp(pd, &iwuqp->ibv_qp, attr, &cmd.ibv_cmd, 1452 sizeof(cmd), &resp.ibv_resp, 1453 sizeof(struct irdma_ucreate_qp_resp)); 1454 if (ret) 1455 goto err_qp; 1456 1457 info->sq_size = resp.actual_sq_size; 1458 info->rq_size = resp.actual_rq_size; 1459 info->first_sq_wq = legacy_mode ? 1 : resp.lsmm; 1460 info->qp_caps = resp.qp_caps; 1461 info->qp_id = resp.qp_id; 1462 iwuqp->irdma_drv_opt = resp.irdma_drv_opt; 1463 iwuqp->ibv_qp.qp_num = resp.qp_id; 1464 1465 iwuqp->send_cq = container_of(attr->send_cq, struct irdma_ucq, 1466 verbs_cq.cq); 1467 iwuqp->recv_cq = container_of(attr->recv_cq, struct irdma_ucq, 1468 verbs_cq.cq); 1469 iwuqp->send_cq->uqp = iwuqp; 1470 iwuqp->recv_cq->uqp = iwuqp; 1471 1472 return 0; 1473 err_qp: 1474 ibv_cmd_dereg_mr(&iwuqp->vmr.ibv_mr); 1475 err_dereg_mr: 1476 printf("%s: failed to create QP, status %d\n", __func__, ret); 1477 irdma_free_hw_buf(info->sq, iwuqp->buf_size); 1478 return ret; 1479 } 1480 1481 /** 1482 * irdma_ucreate_qp - create qp on user app 1483 * @pd: pd for the qp 1484 * @attr: attributes of the qp to be created (sizes, sge, cq) 1485 */ 1486 struct ibv_qp * 1487 irdma_ucreate_qp(struct ibv_pd *pd, 1488 struct ibv_qp_init_attr *attr) 1489 { 1490 struct irdma_qp_uk_init_info info = {}; 1491 struct irdma_uk_attrs *uk_attrs; 1492 struct irdma_uvcontext *iwvctx; 1493 struct irdma_uqp *iwuqp; 1494 u32 sqdepth, rqdepth; 1495 u8 sqshift, rqshift; 1496 int status; 1497 1498 if (attr->qp_type != IBV_QPT_RC && attr->qp_type != IBV_QPT_UD) { 1499 printf("%s: failed to create QP, unsupported QP type: 0x%x\n", 1500 __func__, attr->qp_type); 1501 errno = EOPNOTSUPP; 1502 return NULL; 1503 } 1504 1505 iwvctx = container_of(pd->context, struct irdma_uvcontext, ibv_ctx); 1506 uk_attrs = &iwvctx->uk_attrs; 1507 1508 if (attr->cap.max_send_sge > uk_attrs->max_hw_wq_frags || 1509 attr->cap.max_recv_sge > uk_attrs->max_hw_wq_frags || 1510 attr->cap.max_inline_data > uk_attrs->max_hw_inline) { 1511 errno = EINVAL; 1512 return NULL; 1513 } 1514 1515 irdma_get_wqe_shift(uk_attrs, 1516 uk_attrs->hw_rev > IRDMA_GEN_1 ? attr->cap.max_send_sge + 1 : 1517 attr->cap.max_send_sge, 1518 attr->cap.max_inline_data, &sqshift); 1519 status = irdma_get_sqdepth(uk_attrs->max_hw_wq_quanta, 1520 attr->cap.max_send_wr, sqshift, &sqdepth); 1521 if (status) { 1522 printf("%s: invalid SQ attributes, max_send_wr=%d max_send_sge=%d max_inline=%d\n", 1523 __func__, attr->cap.max_send_wr, attr->cap.max_send_sge, 1524 attr->cap.max_inline_data); 1525 errno = status; 1526 return NULL; 1527 } 1528 1529 if (uk_attrs->hw_rev == IRDMA_GEN_1 && iwvctx->abi_ver > 4) 1530 rqshift = IRDMA_MAX_RQ_WQE_SHIFT_GEN1; 1531 else 1532 irdma_get_wqe_shift(uk_attrs, attr->cap.max_recv_sge, 0, 1533 &rqshift); 1534 1535 status = irdma_get_rqdepth(uk_attrs->max_hw_rq_quanta, 1536 attr->cap.max_recv_wr, rqshift, &rqdepth); 1537 if (status) { 1538 printf("%s: invalid RQ attributes, recv_wr=%d recv_sge=%d\n", 1539 __func__, attr->cap.max_recv_wr, attr->cap.max_recv_sge); 1540 errno = status; 1541 return NULL; 1542 } 1543 1544 iwuqp = memalign(1024, sizeof(*iwuqp)); 1545 if (!iwuqp) 1546 return NULL; 1547 1548 memset(iwuqp, 0, sizeof(*iwuqp)); 1549 1550 if (pthread_spin_init(&iwuqp->lock, PTHREAD_PROCESS_PRIVATE)) 1551 goto err_free_qp; 1552 1553 info.sq_size = sqdepth >> sqshift; 1554 info.rq_size = rqdepth >> rqshift; 1555 attr->cap.max_send_wr = info.sq_size; 1556 attr->cap.max_recv_wr = info.rq_size; 1557 1558 info.uk_attrs = uk_attrs; 1559 info.max_sq_frag_cnt = attr->cap.max_send_sge; 1560 info.max_rq_frag_cnt = attr->cap.max_recv_sge; 1561 iwuqp->recv_sges = calloc(attr->cap.max_recv_sge, sizeof(*iwuqp->recv_sges)); 1562 if (!iwuqp->recv_sges) 1563 goto err_destroy_lock; 1564 1565 info.wqe_alloc_db = (u32 *)iwvctx->db; 1566 info.legacy_mode = iwvctx->legacy_mode; 1567 info.sq_wrtrk_array = calloc(sqdepth, sizeof(*info.sq_wrtrk_array)); 1568 if (!info.sq_wrtrk_array) 1569 goto err_free_rsges; 1570 1571 info.rq_wrid_array = calloc(rqdepth, sizeof(*info.rq_wrid_array)); 1572 if (!info.rq_wrid_array) 1573 goto err_free_sq_wrtrk; 1574 1575 iwuqp->sq_sig_all = attr->sq_sig_all; 1576 iwuqp->qp_type = attr->qp_type; 1577 status = irdma_vmapped_qp(iwuqp, pd, attr, sqdepth, rqdepth, &info, iwvctx->legacy_mode); 1578 if (status) { 1579 errno = status; 1580 goto err_free_rq_wrid; 1581 } 1582 1583 iwuqp->qp.back_qp = iwuqp; 1584 iwuqp->qp.lock = &iwuqp->lock; 1585 1586 info.max_sq_frag_cnt = attr->cap.max_send_sge; 1587 info.max_rq_frag_cnt = attr->cap.max_recv_sge; 1588 info.max_inline_data = attr->cap.max_inline_data; 1589 iwuqp->qp.force_fence = true; 1590 status = irdma_uk_qp_init(&iwuqp->qp, &info); 1591 if (status) { 1592 errno = status; 1593 goto err_free_vmap_qp; 1594 } 1595 1596 attr->cap.max_send_wr = (sqdepth - IRDMA_SQ_RSVD) >> sqshift; 1597 attr->cap.max_recv_wr = (rqdepth - IRDMA_RQ_RSVD) >> rqshift; 1598 return &iwuqp->ibv_qp; 1599 1600 err_free_vmap_qp: 1601 irdma_destroy_vmapped_qp(iwuqp); 1602 irdma_free_hw_buf(info.sq, iwuqp->buf_size); 1603 err_free_rq_wrid: 1604 free(info.rq_wrid_array); 1605 err_free_sq_wrtrk: 1606 free(info.sq_wrtrk_array); 1607 err_free_rsges: 1608 free(iwuqp->recv_sges); 1609 err_destroy_lock: 1610 pthread_spin_destroy(&iwuqp->lock); 1611 err_free_qp: 1612 printf("%s: failed to create QP\n", __func__); 1613 free(iwuqp); 1614 1615 return NULL; 1616 } 1617 1618 /** 1619 * irdma_uquery_qp - query qp for some attribute 1620 * @qp: qp for the attributes query 1621 * @attr: to return the attributes 1622 * @attr_mask: mask of what is query for 1623 * @init_attr: initial attributes during create_qp 1624 */ 1625 int 1626 irdma_uquery_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask, 1627 struct ibv_qp_init_attr *init_attr) 1628 { 1629 struct ibv_query_qp cmd; 1630 1631 return ibv_cmd_query_qp(qp, attr, attr_mask, init_attr, &cmd, 1632 sizeof(cmd)); 1633 } 1634 1635 /** 1636 * irdma_umodify_qp - send qp modify to driver 1637 * @qp: qp to modify 1638 * @attr: attribute to modify 1639 * @attr_mask: mask of the attribute 1640 */ 1641 int 1642 irdma_umodify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask) 1643 { 1644 struct irdma_umodify_qp_resp resp = {}; 1645 struct ibv_modify_qp cmd = {}; 1646 struct irdma_modify_qp_cmd cmd_ex = {}; 1647 struct irdma_uvcontext *iwvctx; 1648 struct irdma_uqp *iwuqp; 1649 1650 iwuqp = container_of(qp, struct irdma_uqp, ibv_qp); 1651 iwvctx = container_of(qp->context, struct irdma_uvcontext, ibv_ctx); 1652 iwuqp->attr_mask = attr_mask; 1653 memcpy(&iwuqp->attr, attr, sizeof(iwuqp->attr)); 1654 1655 if (iwuqp->qp.qp_caps & IRDMA_PUSH_MODE && attr_mask & IBV_QP_STATE && 1656 iwvctx->uk_attrs.hw_rev > IRDMA_GEN_1) { 1657 u64 offset; 1658 void *map; 1659 int ret; 1660 1661 ret = ibv_cmd_modify_qp_ex(qp, attr, attr_mask, &cmd_ex.ibv_cmd, 1662 sizeof(cmd_ex.ibv_cmd), 1663 sizeof(cmd_ex), &resp.ibv_resp, 1664 sizeof(resp.ibv_resp), 1665 sizeof(resp)); 1666 if (!ret) 1667 iwuqp->qp.rd_fence_rate = resp.rd_fence_rate; 1668 if (ret || !resp.push_valid) 1669 return ret; 1670 1671 if (iwuqp->qp.push_wqe) 1672 return ret; 1673 1674 offset = resp.push_wqe_mmap_key; 1675 map = irdma_mmap(qp->context->cmd_fd, offset); 1676 if (map == MAP_FAILED) 1677 return ret; 1678 1679 iwuqp->qp.push_wqe = map; 1680 1681 offset = resp.push_db_mmap_key; 1682 map = irdma_mmap(qp->context->cmd_fd, offset); 1683 if (map == MAP_FAILED) { 1684 irdma_munmap(iwuqp->qp.push_wqe); 1685 iwuqp->qp.push_wqe = NULL; 1686 printf("failed to map push page, errno %d\n", errno); 1687 return ret; 1688 } 1689 iwuqp->qp.push_wqe += resp.push_offset; 1690 iwuqp->qp.push_db = map + resp.push_offset; 1691 1692 return ret; 1693 } else { 1694 int ret; 1695 1696 ret = ibv_cmd_modify_qp(qp, attr, attr_mask, &cmd, sizeof(cmd)); 1697 if (ret) 1698 return ret; 1699 if (attr_mask & IBV_QP_STATE && attr->qp_state == IBV_QPS_ERR) 1700 pthread_create(&iwuqp->flush_thread, NULL, irdma_flush_thread, iwuqp); 1701 return 0; 1702 } 1703 } 1704 1705 static void 1706 irdma_issue_flush(struct ibv_qp *qp, bool sq_flush, bool rq_flush) 1707 { 1708 struct irdma_umodify_qp_resp resp = {}; 1709 struct irdma_modify_qp_cmd cmd_ex = {}; 1710 struct irdma_uqp *iwuqp; 1711 1712 cmd_ex.sq_flush = sq_flush; 1713 cmd_ex.rq_flush = rq_flush; 1714 iwuqp = container_of(qp, struct irdma_uqp, ibv_qp); 1715 1716 ibv_cmd_modify_qp_ex(qp, &iwuqp->attr, iwuqp->attr_mask, 1717 &cmd_ex.ibv_cmd, 1718 sizeof(cmd_ex.ibv_cmd), 1719 sizeof(cmd_ex), &resp.ibv_resp, 1720 sizeof(resp.ibv_resp), 1721 sizeof(resp)); 1722 } 1723 1724 /** 1725 * irdma_clean_cqes - clean cq entries for qp 1726 * @qp: qp for which completions are cleaned 1727 * @iwcq: cq to be cleaned 1728 */ 1729 static void 1730 irdma_clean_cqes(struct irdma_qp_uk *qp, struct irdma_ucq *iwucq) 1731 { 1732 struct irdma_cq_uk *ukcq = &iwucq->cq; 1733 int ret; 1734 1735 ret = pthread_spin_lock(&iwucq->lock); 1736 if (ret) 1737 return; 1738 1739 irdma_uk_clean_cq(qp, ukcq); 1740 pthread_spin_unlock(&iwucq->lock); 1741 } 1742 1743 /** 1744 * irdma_udestroy_qp - destroy qp 1745 * @qp: qp to destroy 1746 */ 1747 int 1748 irdma_udestroy_qp(struct ibv_qp *qp) 1749 { 1750 struct irdma_uqp *iwuqp; 1751 int ret; 1752 1753 iwuqp = container_of(qp, struct irdma_uqp, ibv_qp); 1754 if (iwuqp->flush_thread) { 1755 pthread_cancel(iwuqp->flush_thread); 1756 pthread_join(iwuqp->flush_thread, NULL); 1757 } 1758 ret = pthread_spin_destroy(&iwuqp->lock); 1759 if (ret) 1760 goto err; 1761 1762 ret = irdma_destroy_vmapped_qp(iwuqp); 1763 if (ret) 1764 goto err; 1765 1766 /* Clean any pending completions from the cq(s) */ 1767 if (iwuqp->send_cq) 1768 irdma_clean_cqes(&iwuqp->qp, iwuqp->send_cq); 1769 1770 if (iwuqp->recv_cq && iwuqp->recv_cq != iwuqp->send_cq) 1771 irdma_clean_cqes(&iwuqp->qp, iwuqp->recv_cq); 1772 1773 if (iwuqp->qp.sq_wrtrk_array) 1774 free(iwuqp->qp.sq_wrtrk_array); 1775 if (iwuqp->qp.rq_wrid_array) 1776 free(iwuqp->qp.rq_wrid_array); 1777 1778 irdma_free_hw_buf(iwuqp->qp.sq_base, iwuqp->buf_size); 1779 free(iwuqp->recv_sges); 1780 free(iwuqp); 1781 return 0; 1782 1783 err: 1784 printf("%s: failed to destroy QP, status %d\n", 1785 __func__, ret); 1786 return ret; 1787 } 1788 1789 /** 1790 * irdma_copy_sg_list - copy sg list for qp 1791 * @sg_list: copied into sg_list 1792 * @sgl: copy from sgl 1793 * @num_sges: count of sg entries 1794 * @max_sges: count of max supported sg entries 1795 */ 1796 static void 1797 irdma_copy_sg_list(struct irdma_sge *sg_list, struct ibv_sge *sgl, 1798 int num_sges) 1799 { 1800 int i; 1801 1802 for (i = 0; i < num_sges; i++) { 1803 sg_list[i].tag_off = sgl[i].addr; 1804 sg_list[i].len = sgl[i].length; 1805 sg_list[i].stag = sgl[i].lkey; 1806 } 1807 } 1808 1809 /** 1810 * calc_type2_mw_stag - calculate type 2 MW stag 1811 * @rkey: desired rkey of the MW 1812 * @mw_rkey: type2 memory window rkey 1813 * 1814 * compute type2 memory window stag by taking lower 8 bits 1815 * of the desired rkey and leaving 24 bits if mw->rkey unchanged 1816 */ 1817 static inline u32 calc_type2_mw_stag(u32 rkey, u32 mw_rkey) { 1818 const u32 mask = 0xff; 1819 1820 return (rkey & mask) | (mw_rkey & ~mask); 1821 } 1822 1823 /** 1824 * irdma_post_send - post send wr for user application 1825 * @ib_qp: qp to post wr 1826 * @ib_wr: work request ptr 1827 * @bad_wr: return of bad wr if err 1828 */ 1829 int 1830 irdma_upost_send(struct ibv_qp *ib_qp, struct ibv_send_wr *ib_wr, 1831 struct ibv_send_wr **bad_wr) 1832 { 1833 struct irdma_post_sq_info info; 1834 struct irdma_uvcontext *iwvctx; 1835 struct irdma_uk_attrs *uk_attrs; 1836 struct irdma_uqp *iwuqp; 1837 bool reflush = false; 1838 int err = 0; 1839 1840 iwuqp = container_of(ib_qp, struct irdma_uqp, ibv_qp); 1841 iwvctx = container_of(ib_qp->context, struct irdma_uvcontext, ibv_ctx); 1842 uk_attrs = &iwvctx->uk_attrs; 1843 1844 err = pthread_spin_lock(&iwuqp->lock); 1845 if (err) 1846 return err; 1847 1848 if (!IRDMA_RING_MORE_WORK(iwuqp->qp.sq_ring) && 1849 ib_qp->state == IBV_QPS_ERR) 1850 reflush = true; 1851 1852 while (ib_wr) { 1853 memset(&info, 0, sizeof(info)); 1854 info.wr_id = (u64)(ib_wr->wr_id); 1855 if ((ib_wr->send_flags & IBV_SEND_SIGNALED) || 1856 iwuqp->sq_sig_all) 1857 info.signaled = true; 1858 if (ib_wr->send_flags & IBV_SEND_FENCE) 1859 info.read_fence = true; 1860 if (iwuqp->send_cq->report_rtt) 1861 info.report_rtt = true; 1862 1863 switch (ib_wr->opcode) { 1864 case IBV_WR_SEND_WITH_IMM: 1865 if (iwuqp->qp.qp_caps & IRDMA_SEND_WITH_IMM) { 1866 info.imm_data_valid = true; 1867 info.imm_data = ntohl(ib_wr->imm_data); 1868 } else { 1869 err = EINVAL; 1870 break; 1871 } 1872 /* fallthrough */ 1873 case IBV_WR_SEND: 1874 case IBV_WR_SEND_WITH_INV: 1875 if (ib_wr->opcode == IBV_WR_SEND || 1876 ib_wr->opcode == IBV_WR_SEND_WITH_IMM) { 1877 if (ib_wr->send_flags & IBV_SEND_SOLICITED) 1878 info.op_type = IRDMA_OP_TYPE_SEND_SOL; 1879 else 1880 info.op_type = IRDMA_OP_TYPE_SEND; 1881 } else { 1882 if (ib_wr->send_flags & IBV_SEND_SOLICITED) 1883 info.op_type = IRDMA_OP_TYPE_SEND_SOL_INV; 1884 else 1885 info.op_type = IRDMA_OP_TYPE_SEND_INV; 1886 info.stag_to_inv = ib_wr->imm_data; 1887 } 1888 if (ib_wr->send_flags & IBV_SEND_INLINE) { 1889 info.op.inline_send.data = (void *)(uintptr_t)ib_wr->sg_list[0].addr; 1890 info.op.inline_send.len = ib_wr->sg_list[0].length; 1891 if (ib_qp->qp_type == IBV_QPT_UD) { 1892 struct irdma_uah *ah = container_of(ib_wr->wr.ud.ah, 1893 struct irdma_uah, ibv_ah); 1894 1895 info.op.inline_send.ah_id = ah->ah_id; 1896 info.op.inline_send.qkey = ib_wr->wr.ud.remote_qkey; 1897 info.op.inline_send.dest_qp = ib_wr->wr.ud.remote_qpn; 1898 } 1899 err = irdma_uk_inline_send(&iwuqp->qp, &info, false); 1900 } else { 1901 info.op.send.num_sges = ib_wr->num_sge; 1902 info.op.send.sg_list = (struct irdma_sge *)ib_wr->sg_list; 1903 if (ib_qp->qp_type == IBV_QPT_UD) { 1904 struct irdma_uah *ah = container_of(ib_wr->wr.ud.ah, 1905 struct irdma_uah, ibv_ah); 1906 1907 info.op.inline_send.ah_id = ah->ah_id; 1908 info.op.inline_send.qkey = ib_wr->wr.ud.remote_qkey; 1909 info.op.inline_send.dest_qp = ib_wr->wr.ud.remote_qpn; 1910 } 1911 err = irdma_uk_send(&iwuqp->qp, &info, false); 1912 } 1913 break; 1914 case IBV_WR_RDMA_WRITE_WITH_IMM: 1915 if (iwuqp->qp.qp_caps & IRDMA_WRITE_WITH_IMM) { 1916 info.imm_data_valid = true; 1917 info.imm_data = ntohl(ib_wr->imm_data); 1918 } else { 1919 err = EINVAL; 1920 break; 1921 } 1922 /* fallthrough */ 1923 case IBV_WR_RDMA_WRITE: 1924 if (ib_wr->send_flags & IBV_SEND_SOLICITED) 1925 info.op_type = IRDMA_OP_TYPE_RDMA_WRITE_SOL; 1926 else 1927 info.op_type = IRDMA_OP_TYPE_RDMA_WRITE; 1928 1929 if (ib_wr->send_flags & IBV_SEND_INLINE) { 1930 info.op.inline_rdma_write.data = (void *)(uintptr_t)ib_wr->sg_list[0].addr; 1931 info.op.inline_rdma_write.len = ib_wr->sg_list[0].length; 1932 info.op.inline_rdma_write.rem_addr.tag_off = ib_wr->wr.rdma.remote_addr; 1933 info.op.inline_rdma_write.rem_addr.stag = ib_wr->wr.rdma.rkey; 1934 err = irdma_uk_inline_rdma_write(&iwuqp->qp, &info, false); 1935 } else { 1936 info.op.rdma_write.lo_sg_list = (void *)ib_wr->sg_list; 1937 info.op.rdma_write.num_lo_sges = ib_wr->num_sge; 1938 info.op.rdma_write.rem_addr.tag_off = ib_wr->wr.rdma.remote_addr; 1939 info.op.rdma_write.rem_addr.stag = ib_wr->wr.rdma.rkey; 1940 err = irdma_uk_rdma_write(&iwuqp->qp, &info, false); 1941 } 1942 break; 1943 case IBV_WR_RDMA_READ: 1944 if (ib_wr->num_sge > uk_attrs->max_hw_read_sges) { 1945 err = EINVAL; 1946 break; 1947 } 1948 info.op_type = IRDMA_OP_TYPE_RDMA_READ; 1949 info.op.rdma_read.rem_addr.tag_off = ib_wr->wr.rdma.remote_addr; 1950 info.op.rdma_read.rem_addr.stag = ib_wr->wr.rdma.rkey; 1951 1952 info.op.rdma_read.lo_sg_list = (void *)ib_wr->sg_list; 1953 info.op.rdma_read.num_lo_sges = ib_wr->num_sge; 1954 err = irdma_uk_rdma_read(&iwuqp->qp, &info, false, false); 1955 break; 1956 case IBV_WR_BIND_MW: 1957 if (ib_qp->qp_type != IBV_QPT_RC) { 1958 err = EINVAL; 1959 break; 1960 } 1961 info.op_type = IRDMA_OP_TYPE_BIND_MW; 1962 info.op.bind_window.mr_stag = ib_wr->bind_mw.bind_info.mr->rkey; 1963 if (ib_wr->bind_mw.mw->type == IBV_MW_TYPE_1) { 1964 info.op.bind_window.mem_window_type_1 = true; 1965 info.op.bind_window.mw_stag = ib_wr->bind_mw.rkey; 1966 } else { 1967 struct verbs_mr *vmr = verbs_get_mr(ib_wr->bind_mw.bind_info.mr); 1968 struct irdma_umr *umr = container_of(vmr, struct irdma_umr, vmr); 1969 1970 if (umr->acc_flags & IBV_ACCESS_ZERO_BASED) { 1971 err = EINVAL; 1972 break; 1973 } 1974 info.op.bind_window.mw_stag = 1975 calc_type2_mw_stag(ib_wr->bind_mw.rkey, ib_wr->bind_mw.mw->rkey); 1976 ib_wr->bind_mw.mw->rkey = info.op.bind_window.mw_stag; 1977 1978 } 1979 1980 if (ib_wr->bind_mw.bind_info.mw_access_flags & IBV_ACCESS_ZERO_BASED) { 1981 info.op.bind_window.addressing_type = IRDMA_ADDR_TYPE_ZERO_BASED; 1982 info.op.bind_window.va = NULL; 1983 } else { 1984 info.op.bind_window.addressing_type = IRDMA_ADDR_TYPE_VA_BASED; 1985 info.op.bind_window.va = (void *)(uintptr_t)ib_wr->bind_mw.bind_info.addr; 1986 } 1987 info.op.bind_window.bind_len = ib_wr->bind_mw.bind_info.length; 1988 info.op.bind_window.ena_reads = 1989 (ib_wr->bind_mw.bind_info.mw_access_flags & IBV_ACCESS_REMOTE_READ) ? 1 : 0; 1990 info.op.bind_window.ena_writes = 1991 (ib_wr->bind_mw.bind_info.mw_access_flags & IBV_ACCESS_REMOTE_WRITE) ? 1 : 0; 1992 1993 err = irdma_uk_mw_bind(&iwuqp->qp, &info, false); 1994 break; 1995 case IBV_WR_LOCAL_INV: 1996 info.op_type = IRDMA_OP_TYPE_INV_STAG; 1997 info.op.inv_local_stag.target_stag = ib_wr->imm_data; 1998 err = irdma_uk_stag_local_invalidate(&iwuqp->qp, &info, true); 1999 break; 2000 default: 2001 /* error */ 2002 err = EINVAL; 2003 printf("%s: post work request failed, invalid opcode: 0x%x\n", 2004 __func__, ib_wr->opcode); 2005 break; 2006 } 2007 if (err) 2008 break; 2009 2010 ib_wr = ib_wr->next; 2011 } 2012 2013 if (err) 2014 *bad_wr = ib_wr; 2015 2016 irdma_uk_qp_post_wr(&iwuqp->qp); 2017 if (reflush) 2018 irdma_issue_flush(ib_qp, 1, 0); 2019 2020 pthread_spin_unlock(&iwuqp->lock); 2021 2022 return err; 2023 } 2024 2025 /** 2026 * irdma_post_recv - post receive wr for user application 2027 * @ib_wr: work request for receive 2028 * @bad_wr: bad wr caused an error 2029 */ 2030 int 2031 irdma_upost_recv(struct ibv_qp *ib_qp, struct ibv_recv_wr *ib_wr, 2032 struct ibv_recv_wr **bad_wr) 2033 { 2034 struct irdma_post_rq_info post_recv = {}; 2035 struct irdma_sge *sg_list; 2036 struct irdma_uqp *iwuqp; 2037 bool reflush = false; 2038 int err = 0; 2039 2040 iwuqp = container_of(ib_qp, struct irdma_uqp, ibv_qp); 2041 sg_list = iwuqp->recv_sges; 2042 2043 err = pthread_spin_lock(&iwuqp->lock); 2044 if (err) 2045 return err; 2046 2047 if (!IRDMA_RING_MORE_WORK(iwuqp->qp.rq_ring) && 2048 ib_qp->state == IBV_QPS_ERR) 2049 reflush = true; 2050 2051 while (ib_wr) { 2052 if (ib_wr->num_sge > iwuqp->qp.max_rq_frag_cnt) { 2053 *bad_wr = ib_wr; 2054 err = EINVAL; 2055 goto error; 2056 } 2057 post_recv.num_sges = ib_wr->num_sge; 2058 post_recv.wr_id = ib_wr->wr_id; 2059 irdma_copy_sg_list(sg_list, ib_wr->sg_list, ib_wr->num_sge); 2060 post_recv.sg_list = sg_list; 2061 err = irdma_uk_post_receive(&iwuqp->qp, &post_recv); 2062 if (err) { 2063 *bad_wr = ib_wr; 2064 goto error; 2065 } 2066 2067 if (reflush) 2068 irdma_issue_flush(ib_qp, 0, 1); 2069 2070 ib_wr = ib_wr->next; 2071 } 2072 error: 2073 pthread_spin_unlock(&iwuqp->lock); 2074 2075 return err; 2076 } 2077 2078 /** 2079 * irdma_ucreate_ah - create address handle associated with a pd 2080 * @ibpd: pd for the address handle 2081 * @attr: attributes of address handle 2082 */ 2083 struct ibv_ah * 2084 irdma_ucreate_ah(struct ibv_pd *ibpd, struct ibv_ah_attr *attr) 2085 { 2086 struct irdma_uah *ah; 2087 union ibv_gid sgid; 2088 struct irdma_ucreate_ah_resp resp; 2089 int err; 2090 2091 err = ibv_query_gid(ibpd->context, attr->port_num, attr->grh.sgid_index, 2092 &sgid); 2093 if (err) { 2094 fprintf(stderr, "irdma: Error from ibv_query_gid.\n"); 2095 errno = err; 2096 return NULL; 2097 } 2098 2099 ah = calloc(1, sizeof(*ah)); 2100 if (!ah) 2101 return NULL; 2102 2103 err = ibv_cmd_create_ah(ibpd, &ah->ibv_ah, attr, &resp.ibv_resp, 2104 sizeof(resp)); 2105 if (err) { 2106 free(ah); 2107 errno = err; 2108 return NULL; 2109 } 2110 2111 ah->ah_id = resp.ah_id; 2112 2113 return &ah->ibv_ah; 2114 } 2115 2116 /** 2117 * irdma_udestroy_ah - destroy the address handle 2118 * @ibah: address handle 2119 */ 2120 int 2121 irdma_udestroy_ah(struct ibv_ah *ibah) 2122 { 2123 struct irdma_uah *ah; 2124 int ret; 2125 2126 ah = container_of(ibah, struct irdma_uah, ibv_ah); 2127 2128 ret = ibv_cmd_destroy_ah(ibah); 2129 if (ret) 2130 return ret; 2131 2132 free(ah); 2133 2134 return 0; 2135 } 2136 2137 /** 2138 * irdma_uattach_mcast - Attach qp to multicast group implemented 2139 * @qp: The queue pair 2140 * @gid:The Global ID for multicast group 2141 * @lid: The Local ID 2142 */ 2143 int 2144 irdma_uattach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, 2145 uint16_t lid) 2146 { 2147 return ibv_cmd_attach_mcast(qp, gid, lid); 2148 } 2149 2150 /** 2151 * irdma_udetach_mcast - Detach qp from multicast group 2152 * @qp: The queue pair 2153 * @gid:The Global ID for multicast group 2154 * @lid: The Local ID 2155 */ 2156 int 2157 irdma_udetach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, 2158 uint16_t lid) 2159 { 2160 return ibv_cmd_detach_mcast(qp, gid, lid); 2161 } 2162 2163 /** 2164 * irdma_uresize_cq - resizes a cq 2165 * @cq: cq to resize 2166 * @cqe: the number of cqes of the new cq 2167 */ 2168 int 2169 irdma_uresize_cq(struct ibv_cq *cq, int cqe) 2170 { 2171 struct irdma_uvcontext *iwvctx; 2172 struct irdma_uk_attrs *uk_attrs; 2173 struct irdma_uresize_cq cmd = {}; 2174 struct ibv_resize_cq_resp resp = {}; 2175 struct irdma_ureg_mr reg_mr_cmd = {}; 2176 struct ibv_reg_mr_resp reg_mr_resp = {}; 2177 struct irdma_cq_buf *cq_buf = NULL; 2178 struct irdma_cqe *cq_base = NULL; 2179 struct verbs_mr new_mr = {}; 2180 struct irdma_ucq *iwucq; 2181 size_t cq_size; 2182 u32 cq_pages; 2183 int cqe_needed; 2184 int ret = 0; 2185 2186 iwucq = container_of(cq, struct irdma_ucq, verbs_cq.cq); 2187 iwvctx = container_of(cq->context, struct irdma_uvcontext, ibv_ctx); 2188 uk_attrs = &iwvctx->uk_attrs; 2189 2190 if (!(uk_attrs->feature_flags & IRDMA_FEATURE_CQ_RESIZE)) 2191 return EOPNOTSUPP; 2192 2193 if (cqe > IRDMA_MAX_CQ_SIZE) 2194 return EINVAL; 2195 2196 cqe_needed = cqe + 1; 2197 if (uk_attrs->hw_rev > IRDMA_GEN_1) 2198 cqe_needed *= 2; 2199 2200 if (cqe_needed < IRDMA_U_MINCQ_SIZE) 2201 cqe_needed = IRDMA_U_MINCQ_SIZE; 2202 2203 if (cqe_needed == iwucq->cq.cq_size) 2204 return 0; 2205 2206 cq_size = get_cq_total_bytes(cqe_needed); 2207 cq_pages = cq_size >> IRDMA_HW_PAGE_SHIFT; 2208 cq_base = irdma_alloc_hw_buf(cq_size); 2209 if (!cq_base) 2210 return ENOMEM; 2211 2212 memset(cq_base, 0, cq_size); 2213 2214 cq_buf = malloc(sizeof(*cq_buf)); 2215 if (!cq_buf) { 2216 ret = ENOMEM; 2217 goto err_buf; 2218 } 2219 2220 new_mr.ibv_mr.pd = iwucq->vmr.ibv_mr.pd; 2221 reg_mr_cmd.reg_type = IRDMA_MEMREG_TYPE_CQ; 2222 reg_mr_cmd.cq_pages = cq_pages; 2223 2224 ret = ibv_cmd_reg_mr(new_mr.ibv_mr.pd, cq_base, cq_size, 2225 (uintptr_t)cq_base, IBV_ACCESS_LOCAL_WRITE, 2226 &new_mr.ibv_mr, ®_mr_cmd.ibv_cmd, sizeof(reg_mr_cmd), 2227 ®_mr_resp, sizeof(reg_mr_resp)); 2228 if (ret) 2229 goto err_dereg_mr; 2230 2231 ret = pthread_spin_lock(&iwucq->lock); 2232 if (ret) 2233 goto err_lock; 2234 2235 cmd.user_cq_buffer = (__u64) ((uintptr_t)cq_base); 2236 ret = ibv_cmd_resize_cq(&iwucq->verbs_cq.cq, cqe_needed, &cmd.ibv_cmd, 2237 sizeof(cmd), &resp, sizeof(resp)); 2238 if (ret) 2239 goto err_resize; 2240 2241 memcpy(&cq_buf->cq, &iwucq->cq, sizeof(cq_buf->cq)); 2242 cq_buf->vmr = iwucq->vmr; 2243 iwucq->vmr = new_mr; 2244 irdma_uk_cq_resize(&iwucq->cq, cq_base, cqe_needed); 2245 iwucq->verbs_cq.cq.cqe = cqe; 2246 LIST_INSERT_HEAD(&iwucq->resize_list, cq_buf, list); 2247 2248 pthread_spin_unlock(&iwucq->lock); 2249 2250 return ret; 2251 2252 err_resize: 2253 pthread_spin_unlock(&iwucq->lock); 2254 err_lock: 2255 ibv_cmd_dereg_mr(&new_mr.ibv_mr); 2256 err_dereg_mr: 2257 free(cq_buf); 2258 err_buf: 2259 fprintf(stderr, "failed to resize CQ cq_id=%d ret=%d\n", iwucq->cq.cq_id, ret); 2260 irdma_free_hw_buf(cq_base, cq_size); 2261 return ret; 2262 } 2263