1 /*- 2 * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB 3 * 4 * Copyright (C) 2019 - 2022 Intel Corporation 5 * 6 * This software is available to you under a choice of one of two 7 * licenses. You may choose to be licensed under the terms of the GNU 8 * General Public License (GPL) Version 2, available from the file 9 * COPYING in the main directory of this source tree, or the 10 * OpenFabrics.org BSD license below: 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above 17 * copyright notice, this list of conditions and the following 18 * disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials 23 * provided with the distribution. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * SOFTWARE. 33 */ 34 /*$FreeBSD$*/ 35 36 #include <config.h> 37 #include <stdlib.h> 38 #include <stdio.h> 39 #include <string.h> 40 #include <unistd.h> 41 #include <signal.h> 42 #include <errno.h> 43 #include <sys/param.h> 44 #include <sys/mman.h> 45 #include <netinet/in.h> 46 #include <sys/stat.h> 47 #include <fcntl.h> 48 #include <stdbool.h> 49 #include <infiniband/opcode.h> 50 51 #include "irdma_umain.h" 52 #include "abi.h" 53 54 static inline void 55 print_fw_ver(uint64_t fw_ver, char *str, size_t len) 56 { 57 uint16_t major, minor; 58 59 major = fw_ver >> 32 & 0xffff; 60 minor = fw_ver & 0xffff; 61 62 snprintf(str, len, "%d.%d", major, minor); 63 } 64 65 /** 66 * irdma_uquery_device_ex - query device attributes including extended properties 67 * @context: user context for the device 68 * @input: extensible input struct for ibv_query_device_ex verb 69 * @attr: extended device attribute struct 70 * @attr_size: size of extended device attribute struct 71 **/ 72 int 73 irdma_uquery_device_ex(struct ibv_context *context, 74 const struct ibv_query_device_ex_input *input, 75 struct ibv_device_attr_ex *attr, size_t attr_size) 76 { 77 struct irdma_query_device_ex cmd = {}; 78 struct irdma_query_device_ex_resp resp = {}; 79 uint64_t fw_ver; 80 int ret; 81 82 ret = ibv_cmd_query_device_ex(context, input, attr, attr_size, &fw_ver, 83 &cmd.ibv_cmd, sizeof(cmd.ibv_cmd), sizeof(cmd), 84 &resp.ibv_resp, sizeof(resp.ibv_resp), sizeof(resp)); 85 if (ret) 86 return ret; 87 88 print_fw_ver(fw_ver, attr->orig_attr.fw_ver, sizeof(attr->orig_attr.fw_ver)); 89 90 return 0; 91 } 92 93 /** 94 * irdma_uquery_device - call driver to query device for max resources 95 * @context: user context for the device 96 * @attr: where to save all the mx resources from the driver 97 **/ 98 int 99 irdma_uquery_device(struct ibv_context *context, struct ibv_device_attr *attr) 100 { 101 struct ibv_query_device cmd; 102 uint64_t fw_ver; 103 int ret; 104 105 ret = ibv_cmd_query_device(context, attr, &fw_ver, &cmd, sizeof(cmd)); 106 if (ret) 107 return ret; 108 109 print_fw_ver(fw_ver, attr->fw_ver, sizeof(attr->fw_ver)); 110 111 return 0; 112 } 113 114 /** 115 * irdma_uquery_port - get port attributes (msg size, lnk, mtu...) 116 * @context: user context of the device 117 * @port: port for the attributes 118 * @attr: to return port attributes 119 **/ 120 int 121 irdma_uquery_port(struct ibv_context *context, uint8_t port, 122 struct ibv_port_attr *attr) 123 { 124 struct ibv_query_port cmd; 125 126 return ibv_cmd_query_port(context, port, attr, &cmd, sizeof(cmd)); 127 } 128 129 /** 130 * irdma_ualloc_pd - allocates protection domain and return pd ptr 131 * @context: user context of the device 132 **/ 133 struct ibv_pd * 134 irdma_ualloc_pd(struct ibv_context *context) 135 { 136 struct ibv_alloc_pd cmd; 137 struct irdma_ualloc_pd_resp resp = {}; 138 struct irdma_upd *iwupd; 139 int err; 140 141 iwupd = calloc(1, sizeof(*iwupd)); 142 if (!iwupd) 143 return NULL; 144 145 err = ibv_cmd_alloc_pd(context, &iwupd->ibv_pd, &cmd, sizeof(cmd), 146 &resp.ibv_resp, sizeof(resp)); 147 if (err) 148 goto err_free; 149 150 iwupd->pd_id = resp.pd_id; 151 152 return &iwupd->ibv_pd; 153 154 err_free: 155 free(iwupd); 156 errno = err; 157 return NULL; 158 } 159 160 /** 161 * irdma_ufree_pd - free pd resources 162 * @pd: pd to free resources 163 */ 164 int 165 irdma_ufree_pd(struct ibv_pd *pd) 166 { 167 struct irdma_uvcontext *iwvctx = container_of(pd->context, struct irdma_uvcontext, ibv_ctx); 168 struct irdma_upd *iwupd; 169 int ret; 170 171 iwupd = container_of(pd, struct irdma_upd, ibv_pd); 172 ret = ibv_cmd_dealloc_pd(pd); 173 if (ret) 174 return ret; 175 176 free(iwupd); 177 178 return 0; 179 } 180 181 /** 182 * irdma_ureg_mr - register user memory region 183 * @pd: pd for the mr 184 * @addr: user address of the memory region 185 * @length: length of the memory 186 * @hca_va: hca_va 187 * @access: access allowed on this mr 188 */ 189 struct ibv_mr * 190 irdma_ureg_mr(struct ibv_pd *pd, void *addr, size_t length, 191 int access) 192 { 193 struct verbs_mr *vmr; 194 struct irdma_ureg_mr cmd = {}; 195 struct ibv_reg_mr_resp resp; 196 int err; 197 198 vmr = malloc(sizeof(*vmr)); 199 if (!vmr) 200 return NULL; 201 202 cmd.reg_type = IRDMA_MEMREG_TYPE_MEM; 203 err = ibv_cmd_reg_mr(pd, addr, length, 204 (uintptr_t)addr, access, &vmr->ibv_mr, &cmd.ibv_cmd, 205 sizeof(cmd), &resp, sizeof(resp)); 206 if (err) { 207 free(vmr); 208 errno = err; 209 return NULL; 210 } 211 212 return &vmr->ibv_mr; 213 } 214 215 /* 216 * irdma_urereg_mr - re-register memory region @vmr: mr that was allocated @flags: bit mask to indicate which of the 217 * attr's of MR modified @pd: pd of the mr @addr: user address of the memory region @length: length of the memory 218 * @access: access allowed on this mr 219 */ 220 int 221 irdma_urereg_mr(struct verbs_mr *vmr, int flags, struct ibv_pd *pd, 222 void *addr, size_t length, int access) 223 { 224 struct irdma_urereg_mr cmd = {}; 225 struct ibv_rereg_mr_resp resp; 226 227 cmd.reg_type = IRDMA_MEMREG_TYPE_MEM; 228 return ibv_cmd_rereg_mr(&vmr->ibv_mr, flags, addr, length, (uintptr_t)addr, 229 access, pd, &cmd.ibv_cmd, sizeof(cmd), &resp, 230 sizeof(resp)); 231 } 232 233 /** 234 * irdma_udereg_mr - re-register memory region 235 * @mr: mr that was allocated 236 */ 237 int 238 irdma_udereg_mr(struct ibv_mr *mr) 239 { 240 struct verbs_mr *vmr; 241 int ret; 242 243 vmr = container_of(mr, struct verbs_mr, ibv_mr); 244 245 ret = ibv_cmd_dereg_mr(mr); 246 if (ret) 247 return ret; 248 249 return 0; 250 } 251 252 /** 253 * irdma_ualloc_mw - allocate memory window 254 * @pd: protection domain 255 * @type: memory window type 256 */ 257 struct ibv_mw * 258 irdma_ualloc_mw(struct ibv_pd *pd, enum ibv_mw_type type) 259 { 260 struct ibv_mw *mw; 261 struct ibv_alloc_mw cmd; 262 struct ibv_alloc_mw_resp resp; 263 int err; 264 265 mw = calloc(1, sizeof(*mw)); 266 if (!mw) 267 return NULL; 268 269 if (ibv_cmd_alloc_mw(pd, type, mw, &cmd, sizeof(cmd), &resp, 270 sizeof(resp))) { 271 printf("%s: Failed to alloc memory window\n", 272 __func__); 273 free(mw); 274 return NULL; 275 } 276 277 return mw; 278 } 279 280 /** 281 * irdma_ubind_mw - bind a memory window 282 * @qp: qp to post WR 283 * @mw: memory window to bind 284 * @mw_bind: bind info 285 */ 286 int 287 irdma_ubind_mw(struct ibv_qp *qp, struct ibv_mw *mw, 288 struct ibv_mw_bind *mw_bind) 289 { 290 struct ibv_mw_bind_info *bind_info = &mw_bind->bind_info; 291 struct verbs_mr *vmr; 292 293 struct ibv_send_wr wr = {}; 294 struct ibv_send_wr *bad_wr; 295 int err; 296 297 if (!bind_info->mr && (bind_info->addr || bind_info->length)) 298 return EINVAL; 299 300 if (bind_info->mr) { 301 vmr = verbs_get_mr(bind_info->mr); 302 if (vmr->mr_type != IBV_MR_TYPE_MR) 303 return ENOTSUP; 304 305 if (vmr->access & IBV_ACCESS_ZERO_BASED) 306 return EINVAL; 307 308 if (mw->pd != bind_info->mr->pd) 309 return EPERM; 310 } 311 312 wr.opcode = IBV_WR_BIND_MW; 313 wr.bind_mw.bind_info = mw_bind->bind_info; 314 wr.bind_mw.mw = mw; 315 wr.bind_mw.rkey = ibv_inc_rkey(mw->rkey); 316 317 wr.wr_id = mw_bind->wr_id; 318 wr.send_flags = mw_bind->send_flags; 319 320 err = irdma_upost_send(qp, &wr, &bad_wr); 321 if (!err) 322 mw->rkey = wr.bind_mw.rkey; 323 324 return err; 325 } 326 327 /** 328 * irdma_udealloc_mw - deallocate memory window 329 * @mw: memory window to dealloc 330 */ 331 int 332 irdma_udealloc_mw(struct ibv_mw *mw) 333 { 334 int ret; 335 struct ibv_dealloc_mw cmd; 336 337 ret = ibv_cmd_dealloc_mw(mw, &cmd, sizeof(cmd)); 338 if (ret) 339 return ret; 340 free(mw); 341 342 return 0; 343 } 344 345 static void * 346 irdma_alloc_hw_buf(size_t size) 347 { 348 void *buf; 349 350 buf = memalign(IRDMA_HW_PAGE_SIZE, size); 351 352 if (!buf) 353 return NULL; 354 if (ibv_dontfork_range(buf, size)) { 355 free(buf); 356 return NULL; 357 } 358 359 return buf; 360 } 361 362 static void 363 irdma_free_hw_buf(void *buf, size_t size) 364 { 365 ibv_dofork_range(buf, size); 366 free(buf); 367 } 368 369 /** 370 * get_cq_size - returns actual cqe needed by HW 371 * @ncqe: minimum cqes requested by application 372 * @hw_rev: HW generation 373 * @cqe_64byte_ena: enable 64byte cqe 374 */ 375 static inline int 376 get_cq_size(int ncqe, u8 hw_rev, bool cqe_64byte_ena) 377 { 378 ncqe++; 379 380 /* Completions with immediate require 1 extra entry */ 381 if (!cqe_64byte_ena && hw_rev > IRDMA_GEN_1) 382 ncqe *= 2; 383 384 if (ncqe < IRDMA_U_MINCQ_SIZE) 385 ncqe = IRDMA_U_MINCQ_SIZE; 386 387 return ncqe; 388 } 389 390 static inline size_t get_cq_total_bytes(u32 cq_size, bool cqe_64byte_ena){ 391 if (cqe_64byte_ena) 392 return roundup(cq_size * sizeof(struct irdma_extended_cqe), IRDMA_HW_PAGE_SIZE); 393 else 394 return roundup(cq_size * sizeof(struct irdma_cqe), IRDMA_HW_PAGE_SIZE); 395 } 396 397 /** 398 * ucreate_cq - irdma util function to create a CQ 399 * @context: ibv context 400 * @attr_ex: CQ init attributes 401 * @ext_cq: flag to create an extendable or normal CQ 402 */ 403 static struct ibv_cq_ex * 404 ucreate_cq(struct ibv_context *context, 405 struct ibv_cq_init_attr_ex *attr_ex, 406 bool ext_cq) 407 { 408 struct irdma_cq_uk_init_info info = {}; 409 struct irdma_ureg_mr reg_mr_cmd = {}; 410 struct irdma_ucreate_cq_ex cmd = {}; 411 struct irdma_ucreate_cq_ex_resp resp = {}; 412 struct ibv_reg_mr_resp reg_mr_resp = {}; 413 struct irdma_ureg_mr reg_mr_shadow_cmd = {}; 414 struct ibv_reg_mr_resp reg_mr_shadow_resp = {}; 415 struct irdma_uk_attrs *uk_attrs; 416 struct irdma_uvcontext *iwvctx; 417 struct irdma_ucq *iwucq; 418 size_t total_size; 419 u32 cq_pages; 420 int ret, ncqe; 421 u8 hw_rev; 422 bool cqe_64byte_ena; 423 424 iwvctx = container_of(context, struct irdma_uvcontext, ibv_ctx); 425 uk_attrs = &iwvctx->uk_attrs; 426 hw_rev = uk_attrs->hw_rev; 427 428 if (ext_cq) { 429 u32 supported_flags = IRDMA_STANDARD_WC_FLAGS_EX; 430 431 if (hw_rev == IRDMA_GEN_1 || attr_ex->wc_flags & ~supported_flags) { 432 errno = EOPNOTSUPP; 433 return NULL; 434 } 435 } 436 437 if (attr_ex->cqe < uk_attrs->min_hw_cq_size || attr_ex->cqe > uk_attrs->max_hw_cq_size - 1) { 438 errno = EINVAL; 439 return NULL; 440 } 441 442 /* save the cqe requested by application */ 443 ncqe = attr_ex->cqe; 444 445 iwucq = calloc(1, sizeof(*iwucq)); 446 if (!iwucq) 447 return NULL; 448 449 if (pthread_spin_init(&iwucq->lock, PTHREAD_PROCESS_PRIVATE)) { 450 free(iwucq); 451 return NULL; 452 } 453 454 cqe_64byte_ena = uk_attrs->feature_flags & IRDMA_FEATURE_64_BYTE_CQE ? true : false; 455 info.cq_size = get_cq_size(attr_ex->cqe, hw_rev, cqe_64byte_ena); 456 iwucq->comp_vector = attr_ex->comp_vector; 457 LIST_INIT(&iwucq->resize_list); 458 LIST_INIT(&iwucq->cmpl_generated); 459 total_size = get_cq_total_bytes(info.cq_size, cqe_64byte_ena); 460 cq_pages = total_size >> IRDMA_HW_PAGE_SHIFT; 461 462 if (!(uk_attrs->feature_flags & IRDMA_FEATURE_CQ_RESIZE)) 463 total_size = (cq_pages << IRDMA_HW_PAGE_SHIFT) + IRDMA_DB_SHADOW_AREA_SIZE; 464 465 iwucq->buf_size = total_size; 466 info.cq_base = irdma_alloc_hw_buf(total_size); 467 if (!info.cq_base) 468 goto err_cq_base; 469 470 memset(info.cq_base, 0, total_size); 471 reg_mr_cmd.reg_type = IRDMA_MEMREG_TYPE_CQ; 472 reg_mr_cmd.cq_pages = cq_pages; 473 474 ret = ibv_cmd_reg_mr(&iwvctx->iwupd->ibv_pd, info.cq_base, 475 total_size, (uintptr_t)info.cq_base, 476 IBV_ACCESS_LOCAL_WRITE, &iwucq->vmr.ibv_mr, 477 ®_mr_cmd.ibv_cmd, sizeof(reg_mr_cmd), 478 ®_mr_resp, sizeof(reg_mr_resp)); 479 if (ret) { 480 errno = ret; 481 goto err_dereg_mr; 482 } 483 484 iwucq->vmr.ibv_mr.pd = &iwvctx->iwupd->ibv_pd; 485 486 if (uk_attrs->feature_flags & IRDMA_FEATURE_CQ_RESIZE) { 487 info.shadow_area = irdma_alloc_hw_buf(IRDMA_DB_SHADOW_AREA_SIZE); 488 if (!info.shadow_area) 489 goto err_alloc_shadow; 490 491 memset(info.shadow_area, 0, IRDMA_DB_SHADOW_AREA_SIZE); 492 reg_mr_shadow_cmd.reg_type = IRDMA_MEMREG_TYPE_CQ; 493 reg_mr_shadow_cmd.cq_pages = 1; 494 495 ret = ibv_cmd_reg_mr(&iwvctx->iwupd->ibv_pd, info.shadow_area, 496 IRDMA_DB_SHADOW_AREA_SIZE, (uintptr_t)info.shadow_area, 497 IBV_ACCESS_LOCAL_WRITE, &iwucq->vmr_shadow_area.ibv_mr, 498 ®_mr_shadow_cmd.ibv_cmd, sizeof(reg_mr_shadow_cmd), 499 ®_mr_shadow_resp, sizeof(reg_mr_shadow_resp)); 500 if (ret) { 501 irdma_free_hw_buf(info.shadow_area, IRDMA_DB_SHADOW_AREA_SIZE); 502 errno = ret; 503 goto err_alloc_shadow; 504 } 505 506 iwucq->vmr_shadow_area.ibv_mr.pd = &iwvctx->iwupd->ibv_pd; 507 508 } else { 509 info.shadow_area = (__le64 *) ((u8 *)info.cq_base + (cq_pages << IRDMA_HW_PAGE_SHIFT)); 510 } 511 512 attr_ex->cqe = info.cq_size; 513 cmd.user_cq_buf = (__u64) ((uintptr_t)info.cq_base); 514 cmd.user_shadow_area = (__u64) ((uintptr_t)info.shadow_area); 515 516 ret = ibv_cmd_create_cq_ex(context, attr_ex, &iwucq->verbs_cq.cq_ex, 517 &cmd.ibv_cmd, sizeof(cmd.ibv_cmd), sizeof(cmd), &resp.ibv_resp, 518 sizeof(resp.ibv_resp), sizeof(resp)); 519 attr_ex->cqe = ncqe; 520 if (ret) { 521 errno = ret; 522 goto err_create_cq; 523 } 524 525 if (ext_cq) 526 irdma_ibvcq_ex_fill_priv_funcs(iwucq, attr_ex); 527 info.cq_id = resp.cq_id; 528 /* Do not report the CQE's reserved for immediate and burned by HW */ 529 iwucq->verbs_cq.cq.cqe = ncqe; 530 if (cqe_64byte_ena) 531 info.avoid_mem_cflct = true; 532 info.cqe_alloc_db = (u32 *)((u8 *)iwvctx->db + IRDMA_DB_CQ_OFFSET); 533 irdma_uk_cq_init(&iwucq->cq, &info); 534 return &iwucq->verbs_cq.cq_ex; 535 536 err_create_cq: 537 if (iwucq->vmr_shadow_area.ibv_mr.handle) { 538 ibv_cmd_dereg_mr(&iwucq->vmr_shadow_area.ibv_mr); 539 irdma_free_hw_buf(info.shadow_area, IRDMA_DB_SHADOW_AREA_SIZE); 540 } 541 err_alloc_shadow: 542 ibv_cmd_dereg_mr(&iwucq->vmr.ibv_mr); 543 err_dereg_mr: 544 irdma_free_hw_buf(info.cq_base, total_size); 545 err_cq_base: 546 printf("%s: failed to initialize CQ\n", __func__); 547 pthread_spin_destroy(&iwucq->lock); 548 549 free(iwucq); 550 551 return NULL; 552 } 553 554 struct ibv_cq * 555 irdma_ucreate_cq(struct ibv_context *context, int cqe, 556 struct ibv_comp_channel *channel, 557 int comp_vector) 558 { 559 struct ibv_cq_init_attr_ex attr_ex = { 560 .cqe = cqe, 561 .channel = channel, 562 .comp_vector = comp_vector, 563 }; 564 struct ibv_cq_ex *ibvcq_ex; 565 566 ibvcq_ex = ucreate_cq(context, &attr_ex, false); 567 568 return ibvcq_ex ? ibv_cq_ex_to_cq(ibvcq_ex) : NULL; 569 } 570 571 struct ibv_cq_ex * 572 irdma_ucreate_cq_ex(struct ibv_context *context, 573 struct ibv_cq_init_attr_ex *attr_ex) 574 { 575 return ucreate_cq(context, attr_ex, true); 576 } 577 578 /** 579 * irdma_free_cq_buf - free memory for cq buffer 580 * @cq_buf: cq buf to free 581 */ 582 static void 583 irdma_free_cq_buf(struct irdma_cq_buf *cq_buf) 584 { 585 ibv_cmd_dereg_mr(&cq_buf->vmr.ibv_mr); 586 irdma_free_hw_buf(cq_buf->cq.cq_base, cq_buf->buf_size); 587 free(cq_buf); 588 } 589 590 /** 591 * irdma_process_resize_list - process the cq list to remove buffers 592 * @iwucq: cq which owns the list 593 * @lcqe_buf: cq buf where the last cqe is found 594 */ 595 static int 596 irdma_process_resize_list(struct irdma_ucq *iwucq, 597 struct irdma_cq_buf *lcqe_buf) 598 { 599 struct irdma_cq_buf *cq_buf, *next; 600 int cq_cnt = 0; 601 602 LIST_FOREACH_SAFE(cq_buf, &iwucq->resize_list, list, next) { 603 if (cq_buf == lcqe_buf) 604 return cq_cnt; 605 606 LIST_REMOVE(cq_buf, list); 607 irdma_free_cq_buf(cq_buf); 608 cq_cnt++; 609 } 610 611 return cq_cnt; 612 } 613 614 static void 615 irdma_remove_cmpls_list(struct irdma_ucq *iwucq) 616 { 617 struct irdma_cmpl_gen *cmpl_node, *next; 618 619 LIST_FOREACH_SAFE(cmpl_node, &iwucq->cmpl_generated, list, next) { 620 LIST_REMOVE(cmpl_node, list); 621 free(cmpl_node); 622 } 623 } 624 625 static int 626 irdma_generated_cmpls(struct irdma_ucq *iwucq, struct irdma_cq_poll_info *cq_poll_info) 627 { 628 struct irdma_cmpl_gen *cmpl; 629 630 if (!iwucq || LIST_EMPTY(&iwucq->cmpl_generated)) 631 return ENOENT; 632 cmpl = LIST_FIRST(&iwucq->cmpl_generated); 633 LIST_REMOVE(cmpl, list); 634 memcpy(cq_poll_info, &cmpl->cpi, sizeof(*cq_poll_info)); 635 636 free(cmpl); 637 638 return 0; 639 } 640 641 /** 642 * irdma_set_cpi_common_values - fill in values for polling info struct 643 * @cpi: resulting structure of cq_poll_info type 644 * @qp: QPair 645 * @qp_num: id of the QP 646 */ 647 static void 648 irdma_set_cpi_common_values(struct irdma_cq_poll_info *cpi, 649 struct irdma_qp_uk *qp, __u32 qp_num) 650 { 651 cpi->comp_status = IRDMA_COMPL_STATUS_FLUSHED; 652 cpi->error = 1; 653 cpi->major_err = IRDMA_FLUSH_MAJOR_ERR; 654 cpi->minor_err = FLUSH_GENERAL_ERR; 655 cpi->qp_handle = (irdma_qp_handle) (uintptr_t)qp; 656 cpi->qp_id = qp_num; 657 } 658 659 static bool 660 irdma_cq_empty(struct irdma_ucq *iwucq) 661 { 662 struct irdma_cq_uk *ukcq; 663 __u64 qword3; 664 __le64 *cqe; 665 __u8 polarity; 666 667 ukcq = &iwucq->cq; 668 cqe = IRDMA_GET_CURRENT_CQ_ELEM(ukcq); 669 get_64bit_val(cqe, 24, &qword3); 670 polarity = (__u8) FIELD_GET(IRDMA_CQ_VALID, qword3); 671 672 return polarity != ukcq->polarity; 673 } 674 675 /** 676 * irdma_generate_flush_completions - generate completion from WRs 677 * @iwuqp: pointer to QP 678 */ 679 static void 680 irdma_generate_flush_completions(struct irdma_uqp *iwuqp) 681 { 682 struct irdma_qp_uk *qp = &iwuqp->qp; 683 struct irdma_ring *sq_ring = &qp->sq_ring; 684 struct irdma_ring *rq_ring = &qp->rq_ring; 685 struct irdma_cmpl_gen *cmpl; 686 __le64 *sw_wqe; 687 __u64 wqe_qword; 688 __u32 wqe_idx; 689 690 if (pthread_spin_lock(&iwuqp->send_cq->lock)) 691 return; 692 if (irdma_cq_empty(iwuqp->send_cq)) { 693 while (IRDMA_RING_MORE_WORK(*sq_ring)) { 694 cmpl = malloc(sizeof(*cmpl)); 695 if (!cmpl) { 696 pthread_spin_unlock(&iwuqp->send_cq->lock); 697 return; 698 } 699 700 wqe_idx = sq_ring->tail; 701 irdma_set_cpi_common_values(&cmpl->cpi, qp, qp->qp_id); 702 cmpl->cpi.wr_id = qp->sq_wrtrk_array[wqe_idx].wrid; 703 sw_wqe = qp->sq_base[wqe_idx].elem; 704 get_64bit_val(sw_wqe, 24, &wqe_qword); 705 cmpl->cpi.op_type = (__u8) FIELD_GET(IRDMAQPSQ_OPCODE, wqe_qword); 706 /* remove the SQ WR by moving SQ tail */ 707 IRDMA_RING_SET_TAIL(*sq_ring, sq_ring->tail + qp->sq_wrtrk_array[sq_ring->tail].quanta); 708 LIST_INSERT_HEAD(&iwuqp->send_cq->cmpl_generated, cmpl, list); 709 } 710 } 711 pthread_spin_unlock(&iwuqp->send_cq->lock); 712 if (pthread_spin_lock(&iwuqp->recv_cq->lock)) 713 return; 714 if (irdma_cq_empty(iwuqp->recv_cq)) { 715 while (IRDMA_RING_MORE_WORK(*rq_ring)) { 716 cmpl = malloc(sizeof(*cmpl)); 717 if (!cmpl) { 718 pthread_spin_unlock(&iwuqp->recv_cq->lock); 719 return; 720 } 721 722 wqe_idx = rq_ring->tail; 723 irdma_set_cpi_common_values(&cmpl->cpi, qp, qp->qp_id); 724 cmpl->cpi.wr_id = qp->rq_wrid_array[wqe_idx]; 725 cmpl->cpi.op_type = IRDMA_OP_TYPE_REC; 726 /* remove the RQ WR by moving RQ tail */ 727 IRDMA_RING_SET_TAIL(*rq_ring, rq_ring->tail + 1); 728 LIST_INSERT_HEAD(&iwuqp->recv_cq->cmpl_generated, cmpl, list); 729 } 730 } 731 pthread_spin_unlock(&iwuqp->recv_cq->lock); 732 } 733 734 void * 735 irdma_flush_thread(void *arg) 736 { 737 __u8 i = 5; 738 struct irdma_uqp *iwuqp = arg; 739 740 while (--i) { 741 if (pthread_spin_lock(&iwuqp->lock)) 742 break; 743 irdma_generate_flush_completions(arg); 744 pthread_spin_unlock(&iwuqp->lock); 745 sleep(1); 746 } 747 pthread_exit(NULL); 748 } 749 750 /** 751 * irdma_udestroy_cq - destroys cq 752 * @cq: ptr to cq to be destroyed 753 */ 754 int 755 irdma_udestroy_cq(struct ibv_cq *cq) 756 { 757 struct irdma_uk_attrs *uk_attrs; 758 struct irdma_uvcontext *iwvctx; 759 struct irdma_ucq *iwucq; 760 int ret; 761 762 iwucq = container_of(cq, struct irdma_ucq, verbs_cq.cq); 763 iwvctx = container_of(cq->context, struct irdma_uvcontext, ibv_ctx); 764 uk_attrs = &iwvctx->uk_attrs; 765 766 ret = pthread_spin_destroy(&iwucq->lock); 767 if (ret) 768 goto err; 769 770 if (!LIST_EMPTY(&iwucq->cmpl_generated)) 771 irdma_remove_cmpls_list(iwucq); 772 irdma_process_resize_list(iwucq, NULL); 773 ret = ibv_cmd_destroy_cq(cq); 774 if (ret) 775 goto err; 776 777 ibv_cmd_dereg_mr(&iwucq->vmr.ibv_mr); 778 irdma_free_hw_buf(iwucq->cq.cq_base, iwucq->buf_size); 779 780 if (uk_attrs->feature_flags & IRDMA_FEATURE_CQ_RESIZE) { 781 ibv_cmd_dereg_mr(&iwucq->vmr_shadow_area.ibv_mr); 782 irdma_free_hw_buf(iwucq->cq.shadow_area, IRDMA_DB_SHADOW_AREA_SIZE); 783 } 784 free(iwucq); 785 return 0; 786 787 err: 788 return ret; 789 } 790 791 static enum ibv_wc_status 792 irdma_flush_err_to_ib_wc_status(enum irdma_flush_opcode opcode) 793 { 794 switch (opcode) { 795 case FLUSH_PROT_ERR: 796 return IBV_WC_LOC_PROT_ERR; 797 case FLUSH_REM_ACCESS_ERR: 798 return IBV_WC_REM_ACCESS_ERR; 799 case FLUSH_LOC_QP_OP_ERR: 800 return IBV_WC_LOC_QP_OP_ERR; 801 case FLUSH_REM_OP_ERR: 802 return IBV_WC_REM_OP_ERR; 803 case FLUSH_LOC_LEN_ERR: 804 return IBV_WC_LOC_LEN_ERR; 805 case FLUSH_GENERAL_ERR: 806 return IBV_WC_WR_FLUSH_ERR; 807 case FLUSH_MW_BIND_ERR: 808 return IBV_WC_MW_BIND_ERR; 809 case FLUSH_REM_INV_REQ_ERR: 810 return IBV_WC_REM_INV_REQ_ERR; 811 case FLUSH_RETRY_EXC_ERR: 812 return IBV_WC_RETRY_EXC_ERR; 813 case FLUSH_FATAL_ERR: 814 default: 815 return IBV_WC_FATAL_ERR; 816 } 817 } 818 819 static inline void 820 set_ib_wc_op_sq(struct irdma_cq_poll_info *cur_cqe, struct ibv_wc *entry) 821 { 822 switch (cur_cqe->op_type) { 823 case IRDMA_OP_TYPE_RDMA_WRITE: 824 case IRDMA_OP_TYPE_RDMA_WRITE_SOL: 825 entry->opcode = IBV_WC_RDMA_WRITE; 826 break; 827 case IRDMA_OP_TYPE_RDMA_READ: 828 entry->opcode = IBV_WC_RDMA_READ; 829 break; 830 case IRDMA_OP_TYPE_SEND_SOL: 831 case IRDMA_OP_TYPE_SEND_SOL_INV: 832 case IRDMA_OP_TYPE_SEND_INV: 833 case IRDMA_OP_TYPE_SEND: 834 entry->opcode = IBV_WC_SEND; 835 break; 836 case IRDMA_OP_TYPE_BIND_MW: 837 entry->opcode = IBV_WC_BIND_MW; 838 break; 839 case IRDMA_OP_TYPE_INV_STAG: 840 entry->opcode = IBV_WC_LOCAL_INV; 841 break; 842 default: 843 entry->status = IBV_WC_GENERAL_ERR; 844 printf("%s: Invalid opcode = %d in CQE\n", 845 __func__, cur_cqe->op_type); 846 } 847 } 848 849 static inline void 850 set_ib_wc_op_rq(struct irdma_cq_poll_info *cur_cqe, 851 struct ibv_wc *entry, bool send_imm_support) 852 { 853 if (!send_imm_support) { 854 entry->opcode = cur_cqe->imm_valid ? IBV_WC_RECV_RDMA_WITH_IMM : 855 IBV_WC_RECV; 856 return; 857 } 858 switch (cur_cqe->op_type) { 859 case IBV_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE: 860 case IBV_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE: 861 entry->opcode = IBV_WC_RECV_RDMA_WITH_IMM; 862 break; 863 default: 864 entry->opcode = IBV_WC_RECV; 865 } 866 } 867 868 /** 869 * irdma_process_cqe_ext - process current cqe for extended CQ 870 * @cur_cqe - current cqe info 871 */ 872 static void 873 irdma_process_cqe_ext(struct irdma_cq_poll_info *cur_cqe) 874 { 875 struct irdma_ucq *iwucq = container_of(cur_cqe, struct irdma_ucq, cur_cqe); 876 struct ibv_cq_ex *ibvcq_ex = &iwucq->verbs_cq.cq_ex; 877 878 ibvcq_ex->wr_id = cur_cqe->wr_id; 879 if (cur_cqe->error) 880 ibvcq_ex->status = (cur_cqe->comp_status == IRDMA_COMPL_STATUS_FLUSHED) ? 881 irdma_flush_err_to_ib_wc_status(cur_cqe->minor_err) : IBV_WC_GENERAL_ERR; 882 else 883 ibvcq_ex->status = IBV_WC_SUCCESS; 884 } 885 886 /** 887 * irdma_process_cqe - process current cqe info 888 * @entry - ibv_wc object to fill in for non-extended CQ 889 * @cur_cqe - current cqe info 890 */ 891 static void 892 irdma_process_cqe(struct ibv_wc *entry, struct irdma_cq_poll_info *cur_cqe) 893 { 894 struct irdma_qp_uk *qp; 895 struct ibv_qp *ib_qp; 896 897 entry->wc_flags = 0; 898 entry->wr_id = cur_cqe->wr_id; 899 entry->qp_num = cur_cqe->qp_id; 900 qp = cur_cqe->qp_handle; 901 ib_qp = qp->back_qp; 902 903 if (cur_cqe->error) { 904 entry->status = (cur_cqe->comp_status == IRDMA_COMPL_STATUS_FLUSHED) ? 905 irdma_flush_err_to_ib_wc_status(cur_cqe->minor_err) : IBV_WC_GENERAL_ERR; 906 entry->vendor_err = cur_cqe->major_err << 16 | 907 cur_cqe->minor_err; 908 } else { 909 entry->status = IBV_WC_SUCCESS; 910 } 911 912 if (cur_cqe->imm_valid) { 913 entry->imm_data = htonl(cur_cqe->imm_data); 914 entry->wc_flags |= IBV_WC_WITH_IMM; 915 } 916 917 if (cur_cqe->q_type == IRDMA_CQE_QTYPE_SQ) { 918 set_ib_wc_op_sq(cur_cqe, entry); 919 } else { 920 set_ib_wc_op_rq(cur_cqe, entry, 921 qp->qp_caps & IRDMA_SEND_WITH_IMM ? 922 true : false); 923 if (ib_qp->qp_type != IBV_QPT_UD && 924 cur_cqe->stag_invalid_set) { 925 entry->invalidated_rkey = cur_cqe->inv_stag; 926 entry->wc_flags |= IBV_WC_WITH_INV; 927 } 928 } 929 930 if (ib_qp->qp_type == IBV_QPT_UD) { 931 entry->src_qp = cur_cqe->ud_src_qpn; 932 entry->wc_flags |= IBV_WC_GRH; 933 } else { 934 entry->src_qp = cur_cqe->qp_id; 935 } 936 entry->byte_len = cur_cqe->bytes_xfered; 937 } 938 939 /** 940 * irdma_poll_one - poll one entry of the CQ 941 * @ukcq: ukcq to poll 942 * @cur_cqe: current CQE info to be filled in 943 * @entry: ibv_wc object to be filled for non-extended CQ or NULL for extended CQ 944 * 945 * Returns the internal irdma device error code or 0 on success 946 */ 947 static int 948 irdma_poll_one(struct irdma_cq_uk *ukcq, struct irdma_cq_poll_info *cur_cqe, 949 struct ibv_wc *entry) 950 { 951 int ret = irdma_uk_cq_poll_cmpl(ukcq, cur_cqe); 952 953 if (ret) 954 return ret; 955 956 if (!entry) 957 irdma_process_cqe_ext(cur_cqe); 958 else 959 irdma_process_cqe(entry, cur_cqe); 960 961 return 0; 962 } 963 964 /** 965 * __irdma_upoll_cq - irdma util function to poll device CQ 966 * @iwucq: irdma cq to poll 967 * @num_entries: max cq entries to poll 968 * @entry: pointer to array of ibv_wc objects to be filled in for each completion or NULL if ext CQ 969 * 970 * Returns non-negative value equal to the number of completions 971 * found. On failure, EINVAL 972 */ 973 static int 974 __irdma_upoll_cq(struct irdma_ucq *iwucq, int num_entries, 975 struct ibv_wc *entry) 976 { 977 struct irdma_cq_buf *cq_buf, *next; 978 struct irdma_cq_buf *last_buf = NULL; 979 struct irdma_cq_poll_info *cur_cqe = &iwucq->cur_cqe; 980 bool cq_new_cqe = false; 981 int resized_bufs = 0; 982 int npolled = 0; 983 int ret; 984 985 /* go through the list of previously resized CQ buffers */ 986 LIST_FOREACH_SAFE(cq_buf, &iwucq->resize_list, list, next) { 987 while (npolled < num_entries) { 988 ret = irdma_poll_one(&cq_buf->cq, cur_cqe, 989 entry ? entry + npolled : NULL); 990 if (!ret) { 991 ++npolled; 992 cq_new_cqe = true; 993 continue; 994 } 995 if (ret == ENOENT) 996 break; 997 /* QP using the CQ is destroyed. Skip reporting this CQE */ 998 if (ret == EFAULT) { 999 cq_new_cqe = true; 1000 continue; 1001 } 1002 goto error; 1003 } 1004 1005 /* save the resized CQ buffer which received the last cqe */ 1006 if (cq_new_cqe) 1007 last_buf = cq_buf; 1008 cq_new_cqe = false; 1009 } 1010 1011 /* check the current CQ for new cqes */ 1012 while (npolled < num_entries) { 1013 ret = irdma_poll_one(&iwucq->cq, cur_cqe, 1014 entry ? entry + npolled : NULL); 1015 if (ret == ENOENT) { 1016 ret = irdma_generated_cmpls(iwucq, cur_cqe); 1017 if (!ret) { 1018 if (entry) 1019 irdma_process_cqe(entry + npolled, cur_cqe); 1020 else 1021 irdma_process_cqe_ext(cur_cqe); 1022 } 1023 } 1024 if (!ret) { 1025 ++npolled; 1026 cq_new_cqe = true; 1027 continue; 1028 } 1029 if (ret == ENOENT) 1030 break; 1031 /* QP using the CQ is destroyed. Skip reporting this CQE */ 1032 if (ret == EFAULT) { 1033 cq_new_cqe = true; 1034 continue; 1035 } 1036 goto error; 1037 } 1038 1039 if (cq_new_cqe) 1040 /* all previous CQ resizes are complete */ 1041 resized_bufs = irdma_process_resize_list(iwucq, NULL); 1042 else if (last_buf) 1043 /* only CQ resizes up to the last_buf are complete */ 1044 resized_bufs = irdma_process_resize_list(iwucq, last_buf); 1045 if (resized_bufs) 1046 /* report to the HW the number of complete CQ resizes */ 1047 irdma_uk_cq_set_resized_cnt(&iwucq->cq, resized_bufs); 1048 1049 return npolled; 1050 1051 error: 1052 printf("%s: Error polling CQ, irdma_err: %d\n", __func__, ret); 1053 1054 return EINVAL; 1055 } 1056 1057 /** 1058 * irdma_upoll_cq - verb API callback to poll device CQ 1059 * @cq: ibv_cq to poll 1060 * @num_entries: max cq entries to poll 1061 * @entry: pointer to array of ibv_wc objects to be filled in for each completion 1062 * 1063 * Returns non-negative value equal to the number of completions 1064 * found and a negative error code on failure 1065 */ 1066 int 1067 irdma_upoll_cq(struct ibv_cq *cq, int num_entries, struct ibv_wc *entry) 1068 { 1069 struct irdma_ucq *iwucq; 1070 int ret; 1071 1072 iwucq = container_of(cq, struct irdma_ucq, verbs_cq.cq); 1073 ret = pthread_spin_lock(&iwucq->lock); 1074 if (ret) 1075 return -ret; 1076 1077 ret = __irdma_upoll_cq(iwucq, num_entries, entry); 1078 1079 pthread_spin_unlock(&iwucq->lock); 1080 1081 return ret; 1082 } 1083 1084 /** 1085 * irdma_start_poll - verb_ex API callback to poll batch of WC's 1086 * @ibvcq_ex: ibv extended CQ 1087 * @attr: attributes (not used) 1088 * 1089 * Start polling batch of work completions. Return 0 on success, ENONENT when 1090 * no completions are available on CQ. And an error code on errors 1091 */ 1092 static int 1093 irdma_start_poll(struct ibv_cq_ex *ibvcq_ex, struct ibv_poll_cq_attr *attr) 1094 { 1095 struct irdma_ucq *iwucq; 1096 int ret; 1097 1098 iwucq = container_of(ibvcq_ex, struct irdma_ucq, verbs_cq.cq_ex); 1099 ret = pthread_spin_lock(&iwucq->lock); 1100 if (ret) 1101 return ret; 1102 1103 ret = __irdma_upoll_cq(iwucq, 1, NULL); 1104 if (ret == 1) 1105 return 0; 1106 1107 /* No Completions on CQ */ 1108 if (!ret) 1109 ret = ENOENT; 1110 1111 pthread_spin_unlock(&iwucq->lock); 1112 1113 return ret; 1114 } 1115 1116 /** 1117 * irdma_next_poll - verb_ex API callback to get next WC 1118 * @ibvcq_ex: ibv extended CQ 1119 * 1120 * Return 0 on success, ENONENT when no completions are available on CQ. 1121 * And an error code on errors 1122 */ 1123 static int 1124 irdma_next_poll(struct ibv_cq_ex *ibvcq_ex) 1125 { 1126 struct irdma_ucq *iwucq; 1127 int ret; 1128 1129 iwucq = container_of(ibvcq_ex, struct irdma_ucq, verbs_cq.cq_ex); 1130 ret = __irdma_upoll_cq(iwucq, 1, NULL); 1131 if (ret == 1) 1132 return 0; 1133 1134 /* No Completions on CQ */ 1135 if (!ret) 1136 ret = ENOENT; 1137 1138 return ret; 1139 } 1140 1141 /** 1142 * irdma_end_poll - verb_ex API callback to end polling of WC's 1143 * @ibvcq_ex: ibv extended CQ 1144 */ 1145 static void 1146 irdma_end_poll(struct ibv_cq_ex *ibvcq_ex) 1147 { 1148 struct irdma_ucq *iwucq = container_of(ibvcq_ex, struct irdma_ucq, 1149 verbs_cq.cq_ex); 1150 1151 pthread_spin_unlock(&iwucq->lock); 1152 } 1153 1154 static enum ibv_wc_opcode 1155 irdma_wc_read_opcode(struct ibv_cq_ex *ibvcq_ex) 1156 { 1157 struct irdma_ucq *iwucq = container_of(ibvcq_ex, struct irdma_ucq, 1158 verbs_cq.cq_ex); 1159 1160 switch (iwucq->cur_cqe.op_type) { 1161 case IRDMA_OP_TYPE_RDMA_WRITE: 1162 case IRDMA_OP_TYPE_RDMA_WRITE_SOL: 1163 return IBV_WC_RDMA_WRITE; 1164 case IRDMA_OP_TYPE_RDMA_READ: 1165 return IBV_WC_RDMA_READ; 1166 case IRDMA_OP_TYPE_SEND_SOL: 1167 case IRDMA_OP_TYPE_SEND_SOL_INV: 1168 case IRDMA_OP_TYPE_SEND_INV: 1169 case IRDMA_OP_TYPE_SEND: 1170 return IBV_WC_SEND; 1171 case IRDMA_OP_TYPE_BIND_MW: 1172 return IBV_WC_BIND_MW; 1173 case IRDMA_OP_TYPE_REC: 1174 return IBV_WC_RECV; 1175 case IRDMA_OP_TYPE_REC_IMM: 1176 return IBV_WC_RECV_RDMA_WITH_IMM; 1177 case IRDMA_OP_TYPE_INV_STAG: 1178 return IBV_WC_LOCAL_INV; 1179 } 1180 1181 printf("%s: Invalid opcode = %d in CQE\n", __func__, 1182 iwucq->cur_cqe.op_type); 1183 1184 return 0; 1185 } 1186 1187 static uint32_t irdma_wc_read_vendor_err(struct ibv_cq_ex *ibvcq_ex){ 1188 struct irdma_cq_poll_info *cur_cqe; 1189 struct irdma_ucq *iwucq; 1190 1191 iwucq = container_of(ibvcq_ex, struct irdma_ucq, verbs_cq.cq_ex); 1192 cur_cqe = &iwucq->cur_cqe; 1193 1194 return cur_cqe->error ? cur_cqe->major_err << 16 | cur_cqe->minor_err : 0; 1195 } 1196 1197 static int 1198 irdma_wc_read_wc_flags(struct ibv_cq_ex *ibvcq_ex) 1199 { 1200 struct irdma_cq_poll_info *cur_cqe; 1201 struct irdma_ucq *iwucq; 1202 struct irdma_qp_uk *qp; 1203 struct ibv_qp *ib_qp; 1204 int wc_flags = 0; 1205 1206 iwucq = container_of(ibvcq_ex, struct irdma_ucq, verbs_cq.cq_ex); 1207 cur_cqe = &iwucq->cur_cqe; 1208 qp = cur_cqe->qp_handle; 1209 ib_qp = qp->back_qp; 1210 1211 if (cur_cqe->imm_valid) 1212 wc_flags |= IBV_WC_WITH_IMM; 1213 1214 if (ib_qp->qp_type == IBV_QPT_UD) { 1215 wc_flags |= IBV_WC_GRH; 1216 } else { 1217 if (cur_cqe->stag_invalid_set) { 1218 switch (cur_cqe->op_type) { 1219 case IRDMA_OP_TYPE_REC: 1220 wc_flags |= IBV_WC_WITH_INV; 1221 break; 1222 case IRDMA_OP_TYPE_REC_IMM: 1223 wc_flags |= IBV_WC_WITH_INV; 1224 break; 1225 } 1226 } 1227 } 1228 1229 return wc_flags; 1230 } 1231 1232 static uint32_t irdma_wc_read_byte_len(struct ibv_cq_ex *ibvcq_ex){ 1233 struct irdma_ucq *iwucq = container_of(ibvcq_ex, struct irdma_ucq, 1234 verbs_cq.cq_ex); 1235 1236 return iwucq->cur_cqe.bytes_xfered; 1237 } 1238 1239 static __be32 irdma_wc_read_imm_data(struct ibv_cq_ex *ibvcq_ex){ 1240 struct irdma_cq_poll_info *cur_cqe; 1241 struct irdma_ucq *iwucq; 1242 1243 iwucq = container_of(ibvcq_ex, struct irdma_ucq, verbs_cq.cq_ex); 1244 cur_cqe = &iwucq->cur_cqe; 1245 1246 return cur_cqe->imm_valid ? htonl(cur_cqe->imm_data) : 0; 1247 } 1248 1249 static uint32_t irdma_wc_read_qp_num(struct ibv_cq_ex *ibvcq_ex){ 1250 struct irdma_ucq *iwucq = container_of(ibvcq_ex, struct irdma_ucq, 1251 verbs_cq.cq_ex); 1252 1253 return iwucq->cur_cqe.qp_id; 1254 } 1255 1256 static uint32_t irdma_wc_read_src_qp(struct ibv_cq_ex *ibvcq_ex){ 1257 struct irdma_cq_poll_info *cur_cqe; 1258 struct irdma_ucq *iwucq; 1259 struct irdma_qp_uk *qp; 1260 struct ibv_qp *ib_qp; 1261 1262 iwucq = container_of(ibvcq_ex, struct irdma_ucq, verbs_cq.cq_ex); 1263 cur_cqe = &iwucq->cur_cqe; 1264 qp = cur_cqe->qp_handle; 1265 ib_qp = qp->back_qp; 1266 1267 return ib_qp->qp_type == IBV_QPT_UD ? cur_cqe->ud_src_qpn : cur_cqe->qp_id; 1268 } 1269 1270 static uint8_t irdma_wc_read_sl(struct ibv_cq_ex *ibvcq_ex){ 1271 return 0; 1272 } 1273 1274 void 1275 irdma_ibvcq_ex_fill_priv_funcs(struct irdma_ucq *iwucq, 1276 struct ibv_cq_init_attr_ex *attr_ex) 1277 { 1278 struct ibv_cq_ex *ibvcq_ex = &iwucq->verbs_cq.cq_ex; 1279 1280 ibvcq_ex->start_poll = irdma_start_poll; 1281 ibvcq_ex->end_poll = irdma_end_poll; 1282 ibvcq_ex->next_poll = irdma_next_poll; 1283 1284 ibvcq_ex->read_opcode = irdma_wc_read_opcode; 1285 ibvcq_ex->read_vendor_err = irdma_wc_read_vendor_err; 1286 ibvcq_ex->read_wc_flags = irdma_wc_read_wc_flags; 1287 1288 if (attr_ex->wc_flags & IBV_WC_EX_WITH_BYTE_LEN) 1289 ibvcq_ex->read_byte_len = irdma_wc_read_byte_len; 1290 if (attr_ex->wc_flags & IBV_WC_EX_WITH_IMM) 1291 ibvcq_ex->read_imm_data = irdma_wc_read_imm_data; 1292 if (attr_ex->wc_flags & IBV_WC_EX_WITH_QP_NUM) 1293 ibvcq_ex->read_qp_num = irdma_wc_read_qp_num; 1294 if (attr_ex->wc_flags & IBV_WC_EX_WITH_SRC_QP) 1295 ibvcq_ex->read_src_qp = irdma_wc_read_src_qp; 1296 if (attr_ex->wc_flags & IBV_WC_EX_WITH_SL) 1297 ibvcq_ex->read_sl = irdma_wc_read_sl; 1298 } 1299 1300 /** 1301 * irdma_arm_cq - arm of cq 1302 * @iwucq: cq to which arm 1303 * @cq_notify: notification params 1304 */ 1305 static void 1306 irdma_arm_cq(struct irdma_ucq *iwucq, 1307 enum irdma_cmpl_notify cq_notify) 1308 { 1309 iwucq->is_armed = true; 1310 iwucq->arm_sol = true; 1311 iwucq->skip_arm = false; 1312 iwucq->skip_sol = true; 1313 irdma_uk_cq_request_notification(&iwucq->cq, cq_notify); 1314 } 1315 1316 /** 1317 * irdma_uarm_cq - callback for arm of cq 1318 * @cq: cq to arm 1319 * @solicited: to get notify params 1320 */ 1321 int 1322 irdma_uarm_cq(struct ibv_cq *cq, int solicited) 1323 { 1324 struct irdma_ucq *iwucq; 1325 enum irdma_cmpl_notify cq_notify = IRDMA_CQ_COMPL_EVENT; 1326 int ret; 1327 1328 iwucq = container_of(cq, struct irdma_ucq, verbs_cq.cq); 1329 if (solicited) 1330 cq_notify = IRDMA_CQ_COMPL_SOLICITED; 1331 1332 ret = pthread_spin_lock(&iwucq->lock); 1333 if (ret) 1334 return ret; 1335 1336 if (iwucq->is_armed) { 1337 if (iwucq->arm_sol && !solicited) { 1338 irdma_arm_cq(iwucq, cq_notify); 1339 } else { 1340 iwucq->skip_arm = true; 1341 iwucq->skip_sol = solicited ? true : false; 1342 } 1343 } else { 1344 irdma_arm_cq(iwucq, cq_notify); 1345 } 1346 1347 pthread_spin_unlock(&iwucq->lock); 1348 1349 return 0; 1350 } 1351 1352 /** 1353 * irdma_cq_event - cq to do completion event 1354 * @cq: cq to arm 1355 */ 1356 void 1357 irdma_cq_event(struct ibv_cq *cq) 1358 { 1359 struct irdma_ucq *iwucq; 1360 1361 iwucq = container_of(cq, struct irdma_ucq, verbs_cq.cq); 1362 if (pthread_spin_lock(&iwucq->lock)) 1363 return; 1364 1365 if (iwucq->skip_arm) 1366 irdma_arm_cq(iwucq, IRDMA_CQ_COMPL_EVENT); 1367 else 1368 iwucq->is_armed = false; 1369 1370 pthread_spin_unlock(&iwucq->lock); 1371 } 1372 1373 void * 1374 irdma_mmap(int fd, off_t offset) 1375 { 1376 void *map; 1377 1378 map = mmap(NULL, IRDMA_HW_PAGE_SIZE, PROT_WRITE | PROT_READ, MAP_SHARED, 1379 fd, offset); 1380 if (map == MAP_FAILED) 1381 return map; 1382 1383 if (ibv_dontfork_range(map, IRDMA_HW_PAGE_SIZE)) { 1384 munmap(map, IRDMA_HW_PAGE_SIZE); 1385 return MAP_FAILED; 1386 } 1387 1388 return map; 1389 } 1390 1391 void 1392 irdma_munmap(void *map) 1393 { 1394 ibv_dofork_range(map, IRDMA_HW_PAGE_SIZE); 1395 munmap(map, IRDMA_HW_PAGE_SIZE); 1396 } 1397 1398 /** 1399 * irdma_destroy_vmapped_qp - destroy resources for qp 1400 * @iwuqp: qp struct for resources 1401 */ 1402 static int 1403 irdma_destroy_vmapped_qp(struct irdma_uqp *iwuqp) 1404 { 1405 int ret; 1406 1407 ret = ibv_cmd_destroy_qp(&iwuqp->ibv_qp); 1408 if (ret) 1409 return ret; 1410 1411 if (iwuqp->qp.push_db) 1412 irdma_munmap(iwuqp->qp.push_db); 1413 if (iwuqp->qp.push_wqe) 1414 irdma_munmap(iwuqp->qp.push_wqe); 1415 1416 ibv_cmd_dereg_mr(&iwuqp->vmr.ibv_mr); 1417 1418 return 0; 1419 } 1420 1421 /** 1422 * irdma_vmapped_qp - create resources for qp 1423 * @iwuqp: qp struct for resources 1424 * @pd: pd for the qp 1425 * @attr: attributes of qp passed 1426 * @resp: response back from create qp 1427 * @info: uk info for initializing user level qp 1428 * @abi_ver: abi version of the create qp command 1429 */ 1430 static int 1431 irdma_vmapped_qp(struct irdma_uqp *iwuqp, struct ibv_pd *pd, 1432 struct ibv_qp_init_attr *attr, 1433 struct irdma_qp_uk_init_info *info, 1434 bool legacy_mode) 1435 { 1436 struct irdma_ucreate_qp cmd = {}; 1437 size_t sqsize, rqsize, totalqpsize; 1438 struct irdma_ucreate_qp_resp resp = {}; 1439 struct irdma_ureg_mr reg_mr_cmd = {}; 1440 struct ibv_reg_mr_resp reg_mr_resp = {}; 1441 int ret; 1442 1443 sqsize = roundup(info->sq_depth * IRDMA_QP_WQE_MIN_SIZE, IRDMA_HW_PAGE_SIZE); 1444 rqsize = roundup(info->rq_depth * IRDMA_QP_WQE_MIN_SIZE, IRDMA_HW_PAGE_SIZE); 1445 totalqpsize = rqsize + sqsize + IRDMA_DB_SHADOW_AREA_SIZE; 1446 info->sq = irdma_alloc_hw_buf(totalqpsize); 1447 iwuqp->buf_size = totalqpsize; 1448 1449 if (!info->sq) 1450 return ENOMEM; 1451 1452 memset(info->sq, 0, totalqpsize); 1453 info->rq = &info->sq[sqsize / IRDMA_QP_WQE_MIN_SIZE]; 1454 info->shadow_area = info->rq[rqsize / IRDMA_QP_WQE_MIN_SIZE].elem; 1455 1456 reg_mr_cmd.reg_type = IRDMA_MEMREG_TYPE_QP; 1457 reg_mr_cmd.sq_pages = sqsize >> IRDMA_HW_PAGE_SHIFT; 1458 reg_mr_cmd.rq_pages = rqsize >> IRDMA_HW_PAGE_SHIFT; 1459 1460 ret = ibv_cmd_reg_mr(pd, info->sq, totalqpsize, 1461 (uintptr_t)info->sq, IBV_ACCESS_LOCAL_WRITE, 1462 &iwuqp->vmr.ibv_mr, ®_mr_cmd.ibv_cmd, 1463 sizeof(reg_mr_cmd), ®_mr_resp, 1464 sizeof(reg_mr_resp)); 1465 if (ret) 1466 goto err_dereg_mr; 1467 1468 cmd.user_wqe_bufs = (__u64) ((uintptr_t)info->sq); 1469 cmd.user_compl_ctx = (__u64) (uintptr_t)&iwuqp->qp; 1470 ret = ibv_cmd_create_qp(pd, &iwuqp->ibv_qp, attr, &cmd.ibv_cmd, 1471 sizeof(cmd), &resp.ibv_resp, 1472 sizeof(struct irdma_ucreate_qp_resp)); 1473 if (ret) 1474 goto err_qp; 1475 1476 info->sq_size = resp.actual_sq_size; 1477 info->rq_size = resp.actual_rq_size; 1478 info->first_sq_wq = legacy_mode ? 1 : resp.lsmm; 1479 info->qp_caps = resp.qp_caps; 1480 info->qp_id = resp.qp_id; 1481 iwuqp->irdma_drv_opt = resp.irdma_drv_opt; 1482 iwuqp->ibv_qp.qp_num = resp.qp_id; 1483 1484 iwuqp->send_cq = container_of(attr->send_cq, struct irdma_ucq, 1485 verbs_cq.cq); 1486 iwuqp->recv_cq = container_of(attr->recv_cq, struct irdma_ucq, 1487 verbs_cq.cq); 1488 iwuqp->send_cq->uqp = iwuqp; 1489 iwuqp->recv_cq->uqp = iwuqp; 1490 1491 return 0; 1492 err_qp: 1493 ibv_cmd_dereg_mr(&iwuqp->vmr.ibv_mr); 1494 err_dereg_mr: 1495 printf("%s: failed to create QP, status %d\n", __func__, ret); 1496 irdma_free_hw_buf(info->sq, iwuqp->buf_size); 1497 return ret; 1498 } 1499 1500 /** 1501 * irdma_ucreate_qp - create qp on user app 1502 * @pd: pd for the qp 1503 * @attr: attributes of the qp to be created (sizes, sge, cq) 1504 */ 1505 struct ibv_qp * 1506 irdma_ucreate_qp(struct ibv_pd *pd, 1507 struct ibv_qp_init_attr *attr) 1508 { 1509 struct irdma_qp_uk_init_info info = {}; 1510 struct irdma_uk_attrs *uk_attrs; 1511 struct irdma_uvcontext *iwvctx; 1512 struct irdma_uqp *iwuqp; 1513 int status; 1514 1515 if (attr->qp_type != IBV_QPT_RC && attr->qp_type != IBV_QPT_UD) { 1516 printf("%s: failed to create QP, unsupported QP type: 0x%x\n", 1517 __func__, attr->qp_type); 1518 errno = EOPNOTSUPP; 1519 return NULL; 1520 } 1521 1522 iwvctx = container_of(pd->context, struct irdma_uvcontext, ibv_ctx); 1523 uk_attrs = &iwvctx->uk_attrs; 1524 1525 if (attr->cap.max_send_sge > uk_attrs->max_hw_wq_frags || 1526 attr->cap.max_recv_sge > uk_attrs->max_hw_wq_frags || 1527 attr->cap.max_inline_data > uk_attrs->max_hw_inline) { 1528 errno = EINVAL; 1529 return NULL; 1530 } 1531 1532 info.uk_attrs = uk_attrs; 1533 info.sq_size = attr->cap.max_send_wr; 1534 info.rq_size = attr->cap.max_recv_wr; 1535 info.max_sq_frag_cnt = attr->cap.max_send_sge; 1536 info.max_rq_frag_cnt = attr->cap.max_recv_sge; 1537 info.max_inline_data = attr->cap.max_inline_data; 1538 info.abi_ver = iwvctx->abi_ver; 1539 1540 status = irdma_uk_calc_depth_shift_sq(&info, &info.sq_depth, &info.sq_shift); 1541 if (status) { 1542 printf("%s: invalid SQ attributes, max_send_wr=%d max_send_sge=%d max_inline=%d\n", 1543 __func__, attr->cap.max_send_wr, attr->cap.max_send_sge, 1544 attr->cap.max_inline_data); 1545 errno = status; 1546 return NULL; 1547 } 1548 1549 status = irdma_uk_calc_depth_shift_rq(&info, &info.rq_depth, &info.rq_shift); 1550 if (status) { 1551 printf("%s: invalid RQ attributes, recv_wr=%d recv_sge=%d\n", 1552 __func__, attr->cap.max_recv_wr, attr->cap.max_recv_sge); 1553 errno = status; 1554 return NULL; 1555 } 1556 1557 iwuqp = memalign(1024, sizeof(*iwuqp)); 1558 if (!iwuqp) 1559 return NULL; 1560 1561 memset(iwuqp, 0, sizeof(*iwuqp)); 1562 1563 if (pthread_spin_init(&iwuqp->lock, PTHREAD_PROCESS_PRIVATE)) 1564 goto err_free_qp; 1565 1566 info.sq_size = info.sq_depth >> info.sq_shift; 1567 info.rq_size = info.rq_depth >> info.rq_shift; 1568 /** 1569 * For older ABI version (less than 6) passes raw sq and rq 1570 * quanta in cap.max_send_wr and cap.max_recv_wr. 1571 * But then kernel had no way of calculating the actual qp size. 1572 */ 1573 if (iwvctx->abi_ver <= 5) { 1574 attr->cap.max_send_wr = info.sq_size; 1575 attr->cap.max_recv_wr = info.rq_size; 1576 } 1577 1578 iwuqp->recv_sges = calloc(attr->cap.max_recv_sge, sizeof(*iwuqp->recv_sges)); 1579 if (!iwuqp->recv_sges) 1580 goto err_destroy_lock; 1581 1582 info.wqe_alloc_db = (u32 *)iwvctx->db; 1583 info.legacy_mode = iwvctx->legacy_mode; 1584 info.sq_wrtrk_array = calloc(info.sq_depth, sizeof(*info.sq_wrtrk_array)); 1585 if (!info.sq_wrtrk_array) 1586 goto err_free_rsges; 1587 1588 info.rq_wrid_array = calloc(info.rq_depth, sizeof(*info.rq_wrid_array)); 1589 if (!info.rq_wrid_array) 1590 goto err_free_sq_wrtrk; 1591 1592 iwuqp->sq_sig_all = attr->sq_sig_all; 1593 iwuqp->qp_type = attr->qp_type; 1594 status = irdma_vmapped_qp(iwuqp, pd, attr, &info, iwvctx->legacy_mode); 1595 if (status) { 1596 errno = status; 1597 goto err_free_rq_wrid; 1598 } 1599 1600 iwuqp->qp.back_qp = iwuqp; 1601 iwuqp->qp.lock = &iwuqp->lock; 1602 1603 status = irdma_uk_qp_init(&iwuqp->qp, &info); 1604 if (status) { 1605 errno = status; 1606 goto err_free_vmap_qp; 1607 } 1608 1609 attr->cap.max_send_wr = (info.sq_depth - IRDMA_SQ_RSVD) >> info.sq_shift; 1610 attr->cap.max_recv_wr = (info.rq_depth - IRDMA_RQ_RSVD) >> info.rq_shift; 1611 1612 return &iwuqp->ibv_qp; 1613 1614 err_free_vmap_qp: 1615 irdma_destroy_vmapped_qp(iwuqp); 1616 irdma_free_hw_buf(info.sq, iwuqp->buf_size); 1617 err_free_rq_wrid: 1618 free(info.rq_wrid_array); 1619 err_free_sq_wrtrk: 1620 free(info.sq_wrtrk_array); 1621 err_free_rsges: 1622 free(iwuqp->recv_sges); 1623 err_destroy_lock: 1624 pthread_spin_destroy(&iwuqp->lock); 1625 err_free_qp: 1626 printf("%s: failed to create QP\n", __func__); 1627 free(iwuqp); 1628 1629 return NULL; 1630 } 1631 1632 /** 1633 * irdma_uquery_qp - query qp for some attribute 1634 * @qp: qp for the attributes query 1635 * @attr: to return the attributes 1636 * @attr_mask: mask of what is query for 1637 * @init_attr: initial attributes during create_qp 1638 */ 1639 int 1640 irdma_uquery_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask, 1641 struct ibv_qp_init_attr *init_attr) 1642 { 1643 struct ibv_query_qp cmd; 1644 1645 return ibv_cmd_query_qp(qp, attr, attr_mask, init_attr, &cmd, 1646 sizeof(cmd)); 1647 } 1648 1649 /** 1650 * irdma_umodify_qp - send qp modify to driver 1651 * @qp: qp to modify 1652 * @attr: attribute to modify 1653 * @attr_mask: mask of the attribute 1654 */ 1655 int 1656 irdma_umodify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask) 1657 { 1658 struct irdma_umodify_qp_resp resp = {}; 1659 struct ibv_modify_qp cmd = {}; 1660 struct irdma_modify_qp_cmd cmd_ex = {}; 1661 struct irdma_uvcontext *iwvctx; 1662 struct irdma_uqp *iwuqp; 1663 1664 iwuqp = container_of(qp, struct irdma_uqp, ibv_qp); 1665 iwvctx = container_of(qp->context, struct irdma_uvcontext, ibv_ctx); 1666 1667 if (iwuqp->qp.qp_caps & IRDMA_PUSH_MODE && attr_mask & IBV_QP_STATE && 1668 iwvctx->uk_attrs.hw_rev > IRDMA_GEN_1) { 1669 u64 offset; 1670 void *map; 1671 int ret; 1672 1673 ret = ibv_cmd_modify_qp_ex(qp, attr, attr_mask, &cmd_ex.ibv_cmd, 1674 sizeof(cmd_ex.ibv_cmd), 1675 sizeof(cmd_ex), &resp.ibv_resp, 1676 sizeof(resp.ibv_resp), 1677 sizeof(resp)); 1678 if (!ret) 1679 iwuqp->qp.rd_fence_rate = resp.rd_fence_rate; 1680 if (ret || !resp.push_valid) 1681 return ret; 1682 1683 if (iwuqp->qp.push_wqe) 1684 return ret; 1685 1686 offset = resp.push_wqe_mmap_key; 1687 map = irdma_mmap(qp->context->cmd_fd, offset); 1688 if (map == MAP_FAILED) 1689 return ret; 1690 1691 iwuqp->qp.push_wqe = map; 1692 1693 offset = resp.push_db_mmap_key; 1694 map = irdma_mmap(qp->context->cmd_fd, offset); 1695 if (map == MAP_FAILED) { 1696 irdma_munmap(iwuqp->qp.push_wqe); 1697 iwuqp->qp.push_wqe = NULL; 1698 printf("failed to map push page, errno %d\n", errno); 1699 return ret; 1700 } 1701 iwuqp->qp.push_wqe += resp.push_offset; 1702 iwuqp->qp.push_db = map + resp.push_offset; 1703 1704 return ret; 1705 } else { 1706 int ret; 1707 1708 ret = ibv_cmd_modify_qp(qp, attr, attr_mask, &cmd, sizeof(cmd)); 1709 if (ret) 1710 return ret; 1711 if (attr_mask & IBV_QP_STATE && attr->qp_state == IBV_QPS_ERR) 1712 pthread_create(&iwuqp->flush_thread, NULL, irdma_flush_thread, iwuqp); 1713 return 0; 1714 } 1715 } 1716 1717 static void 1718 irdma_issue_flush(struct ibv_qp *qp, bool sq_flush, bool rq_flush) 1719 { 1720 struct irdma_umodify_qp_resp resp = {}; 1721 struct irdma_modify_qp_cmd cmd_ex = {}; 1722 struct ibv_qp_attr attr = {}; 1723 1724 attr.qp_state = IBV_QPS_ERR; 1725 cmd_ex.sq_flush = sq_flush; 1726 cmd_ex.rq_flush = rq_flush; 1727 1728 ibv_cmd_modify_qp_ex(qp, &attr, IBV_QP_STATE, 1729 &cmd_ex.ibv_cmd, 1730 sizeof(cmd_ex.ibv_cmd), 1731 sizeof(cmd_ex), &resp.ibv_resp, 1732 sizeof(resp.ibv_resp), 1733 sizeof(resp)); 1734 } 1735 1736 /** 1737 * irdma_clean_cqes - clean cq entries for qp 1738 * @qp: qp for which completions are cleaned 1739 * @iwcq: cq to be cleaned 1740 */ 1741 static void 1742 irdma_clean_cqes(struct irdma_qp_uk *qp, struct irdma_ucq *iwucq) 1743 { 1744 struct irdma_cq_uk *ukcq = &iwucq->cq; 1745 int ret; 1746 1747 ret = pthread_spin_lock(&iwucq->lock); 1748 if (ret) 1749 return; 1750 1751 irdma_uk_clean_cq(qp, ukcq); 1752 pthread_spin_unlock(&iwucq->lock); 1753 } 1754 1755 /** 1756 * irdma_udestroy_qp - destroy qp 1757 * @qp: qp to destroy 1758 */ 1759 int 1760 irdma_udestroy_qp(struct ibv_qp *qp) 1761 { 1762 struct irdma_uqp *iwuqp; 1763 int ret; 1764 1765 iwuqp = container_of(qp, struct irdma_uqp, ibv_qp); 1766 if (iwuqp->flush_thread) { 1767 pthread_cancel(iwuqp->flush_thread); 1768 pthread_join(iwuqp->flush_thread, NULL); 1769 } 1770 ret = pthread_spin_destroy(&iwuqp->lock); 1771 if (ret) 1772 goto err; 1773 1774 ret = irdma_destroy_vmapped_qp(iwuqp); 1775 if (ret) 1776 goto err; 1777 1778 /* Clean any pending completions from the cq(s) */ 1779 if (iwuqp->send_cq) 1780 irdma_clean_cqes(&iwuqp->qp, iwuqp->send_cq); 1781 1782 if (iwuqp->recv_cq && iwuqp->recv_cq != iwuqp->send_cq) 1783 irdma_clean_cqes(&iwuqp->qp, iwuqp->recv_cq); 1784 1785 if (iwuqp->qp.sq_wrtrk_array) 1786 free(iwuqp->qp.sq_wrtrk_array); 1787 if (iwuqp->qp.rq_wrid_array) 1788 free(iwuqp->qp.rq_wrid_array); 1789 1790 irdma_free_hw_buf(iwuqp->qp.sq_base, iwuqp->buf_size); 1791 free(iwuqp->recv_sges); 1792 free(iwuqp); 1793 return 0; 1794 1795 err: 1796 printf("%s: failed to destroy QP, status %d\n", 1797 __func__, ret); 1798 return ret; 1799 } 1800 1801 /** 1802 * irdma_copy_sg_list - copy sg list for qp 1803 * @sg_list: copied into sg_list 1804 * @sgl: copy from sgl 1805 * @num_sges: count of sg entries 1806 * @max_sges: count of max supported sg entries 1807 */ 1808 static void 1809 irdma_copy_sg_list(struct irdma_sge *sg_list, struct ibv_sge *sgl, 1810 int num_sges) 1811 { 1812 int i; 1813 1814 for (i = 0; i < num_sges; i++) { 1815 sg_list[i].tag_off = sgl[i].addr; 1816 sg_list[i].len = sgl[i].length; 1817 sg_list[i].stag = sgl[i].lkey; 1818 } 1819 } 1820 1821 /** 1822 * calc_type2_mw_stag - calculate type 2 MW stag 1823 * @rkey: desired rkey of the MW 1824 * @mw_rkey: type2 memory window rkey 1825 * 1826 * compute type2 memory window stag by taking lower 8 bits 1827 * of the desired rkey and leaving 24 bits if mw->rkey unchanged 1828 */ 1829 static inline u32 calc_type2_mw_stag(u32 rkey, u32 mw_rkey) { 1830 const u32 mask = 0xff; 1831 1832 return (rkey & mask) | (mw_rkey & ~mask); 1833 } 1834 1835 /** 1836 * irdma_post_send - post send wr for user application 1837 * @ib_qp: qp to post wr 1838 * @ib_wr: work request ptr 1839 * @bad_wr: return of bad wr if err 1840 */ 1841 int 1842 irdma_upost_send(struct ibv_qp *ib_qp, struct ibv_send_wr *ib_wr, 1843 struct ibv_send_wr **bad_wr) 1844 { 1845 struct irdma_post_sq_info info; 1846 struct irdma_uvcontext *iwvctx; 1847 struct irdma_uk_attrs *uk_attrs; 1848 struct irdma_uqp *iwuqp; 1849 bool reflush = false; 1850 int err = 0; 1851 1852 iwuqp = container_of(ib_qp, struct irdma_uqp, ibv_qp); 1853 iwvctx = container_of(ib_qp->context, struct irdma_uvcontext, ibv_ctx); 1854 uk_attrs = &iwvctx->uk_attrs; 1855 1856 err = pthread_spin_lock(&iwuqp->lock); 1857 if (err) 1858 return err; 1859 1860 if (!IRDMA_RING_MORE_WORK(iwuqp->qp.sq_ring) && 1861 ib_qp->state == IBV_QPS_ERR) 1862 reflush = true; 1863 1864 while (ib_wr) { 1865 memset(&info, 0, sizeof(info)); 1866 info.wr_id = (u64)(ib_wr->wr_id); 1867 if ((ib_wr->send_flags & IBV_SEND_SIGNALED) || 1868 iwuqp->sq_sig_all) 1869 info.signaled = true; 1870 if (ib_wr->send_flags & IBV_SEND_FENCE) 1871 info.read_fence = true; 1872 1873 switch (ib_wr->opcode) { 1874 case IBV_WR_SEND_WITH_IMM: 1875 if (iwuqp->qp.qp_caps & IRDMA_SEND_WITH_IMM) { 1876 info.imm_data_valid = true; 1877 info.imm_data = ntohl(ib_wr->imm_data); 1878 } else { 1879 err = EINVAL; 1880 break; 1881 } 1882 /* fallthrough */ 1883 case IBV_WR_SEND: 1884 case IBV_WR_SEND_WITH_INV: 1885 if (ib_wr->opcode == IBV_WR_SEND || 1886 ib_wr->opcode == IBV_WR_SEND_WITH_IMM) { 1887 if (ib_wr->send_flags & IBV_SEND_SOLICITED) 1888 info.op_type = IRDMA_OP_TYPE_SEND_SOL; 1889 else 1890 info.op_type = IRDMA_OP_TYPE_SEND; 1891 } else { 1892 if (ib_wr->send_flags & IBV_SEND_SOLICITED) 1893 info.op_type = IRDMA_OP_TYPE_SEND_SOL_INV; 1894 else 1895 info.op_type = IRDMA_OP_TYPE_SEND_INV; 1896 info.stag_to_inv = ib_wr->imm_data; 1897 } 1898 info.op.send.num_sges = ib_wr->num_sge; 1899 info.op.send.sg_list = (struct irdma_sge *)ib_wr->sg_list; 1900 if (ib_qp->qp_type == IBV_QPT_UD) { 1901 struct irdma_uah *ah = container_of(ib_wr->wr.ud.ah, 1902 struct irdma_uah, ibv_ah); 1903 1904 info.op.send.ah_id = ah->ah_id; 1905 info.op.send.qkey = ib_wr->wr.ud.remote_qkey; 1906 info.op.send.dest_qp = ib_wr->wr.ud.remote_qpn; 1907 } 1908 1909 if (ib_wr->send_flags & IBV_SEND_INLINE) 1910 err = irdma_uk_inline_send(&iwuqp->qp, &info, false); 1911 else 1912 err = irdma_uk_send(&iwuqp->qp, &info, false); 1913 break; 1914 case IBV_WR_RDMA_WRITE_WITH_IMM: 1915 if (iwuqp->qp.qp_caps & IRDMA_WRITE_WITH_IMM) { 1916 info.imm_data_valid = true; 1917 info.imm_data = ntohl(ib_wr->imm_data); 1918 } else { 1919 err = EINVAL; 1920 break; 1921 } 1922 /* fallthrough */ 1923 case IBV_WR_RDMA_WRITE: 1924 if (ib_wr->send_flags & IBV_SEND_SOLICITED) 1925 info.op_type = IRDMA_OP_TYPE_RDMA_WRITE_SOL; 1926 else 1927 info.op_type = IRDMA_OP_TYPE_RDMA_WRITE; 1928 1929 info.op.rdma_write.num_lo_sges = ib_wr->num_sge; 1930 info.op.rdma_write.lo_sg_list = (void *)ib_wr->sg_list; 1931 info.op.rdma_write.rem_addr.tag_off = ib_wr->wr.rdma.remote_addr; 1932 info.op.rdma_write.rem_addr.stag = ib_wr->wr.rdma.rkey; 1933 if (ib_wr->send_flags & IBV_SEND_INLINE) 1934 err = irdma_uk_inline_rdma_write(&iwuqp->qp, &info, false); 1935 else 1936 err = irdma_uk_rdma_write(&iwuqp->qp, &info, false); 1937 break; 1938 case IBV_WR_RDMA_READ: 1939 if (ib_wr->num_sge > uk_attrs->max_hw_read_sges) { 1940 err = EINVAL; 1941 break; 1942 } 1943 info.op_type = IRDMA_OP_TYPE_RDMA_READ; 1944 info.op.rdma_read.rem_addr.tag_off = ib_wr->wr.rdma.remote_addr; 1945 info.op.rdma_read.rem_addr.stag = ib_wr->wr.rdma.rkey; 1946 1947 info.op.rdma_read.lo_sg_list = (void *)ib_wr->sg_list; 1948 info.op.rdma_read.num_lo_sges = ib_wr->num_sge; 1949 err = irdma_uk_rdma_read(&iwuqp->qp, &info, false, false); 1950 break; 1951 case IBV_WR_BIND_MW: 1952 if (ib_qp->qp_type != IBV_QPT_RC) { 1953 err = EINVAL; 1954 break; 1955 } 1956 info.op_type = IRDMA_OP_TYPE_BIND_MW; 1957 info.op.bind_window.mr_stag = ib_wr->bind_mw.bind_info.mr->rkey; 1958 if (ib_wr->bind_mw.mw->type == IBV_MW_TYPE_1) { 1959 info.op.bind_window.mem_window_type_1 = true; 1960 info.op.bind_window.mw_stag = ib_wr->bind_mw.rkey; 1961 } else { 1962 struct verbs_mr *vmr = verbs_get_mr(ib_wr->bind_mw.bind_info.mr); 1963 1964 if (vmr->access & IBV_ACCESS_ZERO_BASED) { 1965 err = EINVAL; 1966 break; 1967 } 1968 info.op.bind_window.mw_stag = 1969 calc_type2_mw_stag(ib_wr->bind_mw.rkey, ib_wr->bind_mw.mw->rkey); 1970 ib_wr->bind_mw.mw->rkey = info.op.bind_window.mw_stag; 1971 1972 } 1973 1974 if (ib_wr->bind_mw.bind_info.mw_access_flags & IBV_ACCESS_ZERO_BASED) { 1975 info.op.bind_window.addressing_type = IRDMA_ADDR_TYPE_ZERO_BASED; 1976 info.op.bind_window.va = NULL; 1977 } else { 1978 info.op.bind_window.addressing_type = IRDMA_ADDR_TYPE_VA_BASED; 1979 info.op.bind_window.va = (void *)(uintptr_t)ib_wr->bind_mw.bind_info.addr; 1980 } 1981 info.op.bind_window.bind_len = ib_wr->bind_mw.bind_info.length; 1982 info.op.bind_window.ena_reads = 1983 (ib_wr->bind_mw.bind_info.mw_access_flags & IBV_ACCESS_REMOTE_READ) ? 1 : 0; 1984 info.op.bind_window.ena_writes = 1985 (ib_wr->bind_mw.bind_info.mw_access_flags & IBV_ACCESS_REMOTE_WRITE) ? 1 : 0; 1986 1987 err = irdma_uk_mw_bind(&iwuqp->qp, &info, false); 1988 break; 1989 case IBV_WR_LOCAL_INV: 1990 info.op_type = IRDMA_OP_TYPE_INV_STAG; 1991 info.op.inv_local_stag.target_stag = ib_wr->imm_data; 1992 err = irdma_uk_stag_local_invalidate(&iwuqp->qp, &info, true); 1993 break; 1994 default: 1995 /* error */ 1996 err = EINVAL; 1997 printf("%s: post work request failed, invalid opcode: 0x%x\n", 1998 __func__, ib_wr->opcode); 1999 break; 2000 } 2001 if (err) 2002 break; 2003 2004 ib_wr = ib_wr->next; 2005 } 2006 2007 if (err) 2008 *bad_wr = ib_wr; 2009 2010 irdma_uk_qp_post_wr(&iwuqp->qp); 2011 if (reflush) 2012 irdma_issue_flush(ib_qp, 1, 0); 2013 2014 pthread_spin_unlock(&iwuqp->lock); 2015 2016 return err; 2017 } 2018 2019 /** 2020 * irdma_post_recv - post receive wr for user application 2021 * @ib_wr: work request for receive 2022 * @bad_wr: bad wr caused an error 2023 */ 2024 int 2025 irdma_upost_recv(struct ibv_qp *ib_qp, struct ibv_recv_wr *ib_wr, 2026 struct ibv_recv_wr **bad_wr) 2027 { 2028 struct irdma_post_rq_info post_recv = {}; 2029 struct irdma_sge *sg_list; 2030 struct irdma_uqp *iwuqp; 2031 bool reflush = false; 2032 int err = 0; 2033 2034 iwuqp = container_of(ib_qp, struct irdma_uqp, ibv_qp); 2035 sg_list = iwuqp->recv_sges; 2036 2037 err = pthread_spin_lock(&iwuqp->lock); 2038 if (err) 2039 return err; 2040 2041 if (!IRDMA_RING_MORE_WORK(iwuqp->qp.rq_ring) && 2042 ib_qp->state == IBV_QPS_ERR) 2043 reflush = true; 2044 2045 while (ib_wr) { 2046 if (ib_wr->num_sge > iwuqp->qp.max_rq_frag_cnt) { 2047 *bad_wr = ib_wr; 2048 err = EINVAL; 2049 goto error; 2050 } 2051 post_recv.num_sges = ib_wr->num_sge; 2052 post_recv.wr_id = ib_wr->wr_id; 2053 irdma_copy_sg_list(sg_list, ib_wr->sg_list, ib_wr->num_sge); 2054 post_recv.sg_list = sg_list; 2055 err = irdma_uk_post_receive(&iwuqp->qp, &post_recv); 2056 if (err) { 2057 *bad_wr = ib_wr; 2058 goto error; 2059 } 2060 2061 if (reflush) 2062 irdma_issue_flush(ib_qp, 0, 1); 2063 2064 ib_wr = ib_wr->next; 2065 } 2066 error: 2067 pthread_spin_unlock(&iwuqp->lock); 2068 2069 return err; 2070 } 2071 2072 /** 2073 * irdma_ucreate_ah - create address handle associated with a pd 2074 * @ibpd: pd for the address handle 2075 * @attr: attributes of address handle 2076 */ 2077 struct ibv_ah * 2078 irdma_ucreate_ah(struct ibv_pd *ibpd, struct ibv_ah_attr *attr) 2079 { 2080 struct irdma_uah *ah; 2081 union ibv_gid sgid; 2082 struct irdma_ucreate_ah_resp resp = {}; 2083 int err; 2084 2085 err = ibv_query_gid(ibpd->context, attr->port_num, attr->grh.sgid_index, 2086 &sgid); 2087 if (err) { 2088 fprintf(stderr, "irdma: Error from ibv_query_gid.\n"); 2089 errno = err; 2090 return NULL; 2091 } 2092 2093 ah = calloc(1, sizeof(*ah)); 2094 if (!ah) 2095 return NULL; 2096 2097 err = ibv_cmd_create_ah(ibpd, &ah->ibv_ah, attr, &resp.ibv_resp, 2098 sizeof(resp)); 2099 if (err) { 2100 free(ah); 2101 errno = err; 2102 return NULL; 2103 } 2104 2105 ah->ah_id = resp.ah_id; 2106 2107 return &ah->ibv_ah; 2108 } 2109 2110 /** 2111 * irdma_udestroy_ah - destroy the address handle 2112 * @ibah: address handle 2113 */ 2114 int 2115 irdma_udestroy_ah(struct ibv_ah *ibah) 2116 { 2117 struct irdma_uah *ah; 2118 int ret; 2119 2120 ah = container_of(ibah, struct irdma_uah, ibv_ah); 2121 2122 ret = ibv_cmd_destroy_ah(ibah); 2123 if (ret) 2124 return ret; 2125 2126 free(ah); 2127 2128 return 0; 2129 } 2130 2131 /** 2132 * irdma_uattach_mcast - Attach qp to multicast group implemented 2133 * @qp: The queue pair 2134 * @gid:The Global ID for multicast group 2135 * @lid: The Local ID 2136 */ 2137 int 2138 irdma_uattach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, 2139 uint16_t lid) 2140 { 2141 return ibv_cmd_attach_mcast(qp, gid, lid); 2142 } 2143 2144 /** 2145 * irdma_udetach_mcast - Detach qp from multicast group 2146 * @qp: The queue pair 2147 * @gid:The Global ID for multicast group 2148 * @lid: The Local ID 2149 */ 2150 int 2151 irdma_udetach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, 2152 uint16_t lid) 2153 { 2154 return ibv_cmd_detach_mcast(qp, gid, lid); 2155 } 2156 2157 /** 2158 * irdma_uresize_cq - resizes a cq 2159 * @cq: cq to resize 2160 * @cqe: the number of cqes of the new cq 2161 */ 2162 int 2163 irdma_uresize_cq(struct ibv_cq *cq, int cqe) 2164 { 2165 struct irdma_uvcontext *iwvctx; 2166 struct irdma_uk_attrs *uk_attrs; 2167 struct irdma_uresize_cq cmd = {}; 2168 struct ibv_resize_cq_resp resp = {}; 2169 struct irdma_ureg_mr reg_mr_cmd = {}; 2170 struct ibv_reg_mr_resp reg_mr_resp = {}; 2171 struct irdma_cq_buf *cq_buf = NULL; 2172 struct irdma_cqe *cq_base = NULL; 2173 struct verbs_mr new_mr = {}; 2174 struct irdma_ucq *iwucq; 2175 size_t cq_size; 2176 u32 cq_pages; 2177 int cqe_needed; 2178 int ret = 0; 2179 bool cqe_64byte_ena; 2180 2181 iwucq = container_of(cq, struct irdma_ucq, verbs_cq.cq); 2182 iwvctx = container_of(cq->context, struct irdma_uvcontext, ibv_ctx); 2183 uk_attrs = &iwvctx->uk_attrs; 2184 2185 if (!(uk_attrs->feature_flags & IRDMA_FEATURE_CQ_RESIZE)) 2186 return EOPNOTSUPP; 2187 2188 if (cqe < uk_attrs->min_hw_cq_size || cqe > uk_attrs->max_hw_cq_size - 1) 2189 return EINVAL; 2190 2191 cqe_64byte_ena = uk_attrs->feature_flags & IRDMA_FEATURE_64_BYTE_CQE ? true : false; 2192 2193 cqe_needed = get_cq_size(cqe, uk_attrs->hw_rev, cqe_64byte_ena); 2194 2195 if (cqe_needed == iwucq->cq.cq_size) 2196 return 0; 2197 2198 cq_size = get_cq_total_bytes(cqe_needed, cqe_64byte_ena); 2199 cq_pages = cq_size >> IRDMA_HW_PAGE_SHIFT; 2200 cq_base = irdma_alloc_hw_buf(cq_size); 2201 if (!cq_base) 2202 return ENOMEM; 2203 2204 memset(cq_base, 0, cq_size); 2205 2206 cq_buf = malloc(sizeof(*cq_buf)); 2207 if (!cq_buf) { 2208 ret = ENOMEM; 2209 goto err_buf; 2210 } 2211 2212 new_mr.ibv_mr.pd = iwucq->vmr.ibv_mr.pd; 2213 reg_mr_cmd.reg_type = IRDMA_MEMREG_TYPE_CQ; 2214 reg_mr_cmd.cq_pages = cq_pages; 2215 2216 ret = ibv_cmd_reg_mr(new_mr.ibv_mr.pd, cq_base, cq_size, 2217 (uintptr_t)cq_base, IBV_ACCESS_LOCAL_WRITE, 2218 &new_mr.ibv_mr, ®_mr_cmd.ibv_cmd, sizeof(reg_mr_cmd), 2219 ®_mr_resp, sizeof(reg_mr_resp)); 2220 if (ret) 2221 goto err_dereg_mr; 2222 2223 ret = pthread_spin_lock(&iwucq->lock); 2224 if (ret) 2225 goto err_lock; 2226 2227 cmd.user_cq_buffer = (__u64) ((uintptr_t)cq_base); 2228 ret = ibv_cmd_resize_cq(&iwucq->verbs_cq.cq, cqe_needed, &cmd.ibv_cmd, 2229 sizeof(cmd), &resp, sizeof(resp)); 2230 if (ret) 2231 goto err_resize; 2232 2233 memcpy(&cq_buf->cq, &iwucq->cq, sizeof(cq_buf->cq)); 2234 cq_buf->buf_size = cq_size; 2235 cq_buf->vmr = iwucq->vmr; 2236 iwucq->vmr = new_mr; 2237 irdma_uk_cq_resize(&iwucq->cq, cq_base, cqe_needed); 2238 iwucq->verbs_cq.cq.cqe = cqe; 2239 LIST_INSERT_HEAD(&iwucq->resize_list, cq_buf, list); 2240 2241 pthread_spin_unlock(&iwucq->lock); 2242 2243 return ret; 2244 2245 err_resize: 2246 pthread_spin_unlock(&iwucq->lock); 2247 err_lock: 2248 ibv_cmd_dereg_mr(&new_mr.ibv_mr); 2249 err_dereg_mr: 2250 free(cq_buf); 2251 err_buf: 2252 fprintf(stderr, "failed to resize CQ cq_id=%d ret=%d\n", iwucq->cq.cq_id, ret); 2253 irdma_free_hw_buf(cq_base, cq_size); 2254 return ret; 2255 } 2256