1 /*- 2 * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB 3 * 4 * Copyright (C) 2019 - 2023 Intel Corporation 5 * 6 * This software is available to you under a choice of one of two 7 * licenses. You may choose to be licensed under the terms of the GNU 8 * General Public License (GPL) Version 2, available from the file 9 * COPYING in the main directory of this source tree, or the 10 * OpenFabrics.org BSD license below: 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above 17 * copyright notice, this list of conditions and the following 18 * disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials 23 * provided with the distribution. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * SOFTWARE. 33 */ 34 /*$FreeBSD$*/ 35 36 #include <config.h> 37 #include <stdlib.h> 38 #include <stdio.h> 39 #include <string.h> 40 #include <unistd.h> 41 #include <signal.h> 42 #include <errno.h> 43 #include <sys/param.h> 44 #include <sys/mman.h> 45 #include <netinet/in.h> 46 #include <sys/stat.h> 47 #include <fcntl.h> 48 #include <stdbool.h> 49 #include <infiniband/opcode.h> 50 51 #include "irdma_umain.h" 52 #include "abi.h" 53 54 static inline void 55 print_fw_ver(uint64_t fw_ver, char *str, size_t len) 56 { 57 uint16_t major, minor; 58 59 major = fw_ver >> 32 & 0xffff; 60 minor = fw_ver & 0xffff; 61 62 snprintf(str, len, "%d.%d", major, minor); 63 } 64 65 /** 66 * irdma_uquery_device_ex - query device attributes including extended properties 67 * @context: user context for the device 68 * @input: extensible input struct for ibv_query_device_ex verb 69 * @attr: extended device attribute struct 70 * @attr_size: size of extended device attribute struct 71 **/ 72 int 73 irdma_uquery_device_ex(struct ibv_context *context, 74 const struct ibv_query_device_ex_input *input, 75 struct ibv_device_attr_ex *attr, size_t attr_size) 76 { 77 struct irdma_query_device_ex cmd = {}; 78 struct irdma_query_device_ex_resp resp = {}; 79 uint64_t fw_ver; 80 int ret; 81 82 ret = ibv_cmd_query_device_ex(context, input, attr, attr_size, &fw_ver, 83 &cmd.ibv_cmd, sizeof(cmd.ibv_cmd), sizeof(cmd), 84 &resp.ibv_resp, sizeof(resp.ibv_resp), sizeof(resp)); 85 if (ret) 86 return ret; 87 88 print_fw_ver(fw_ver, attr->orig_attr.fw_ver, sizeof(attr->orig_attr.fw_ver)); 89 90 return 0; 91 } 92 93 /** 94 * irdma_uquery_device - call driver to query device for max resources 95 * @context: user context for the device 96 * @attr: where to save all the mx resources from the driver 97 **/ 98 int 99 irdma_uquery_device(struct ibv_context *context, struct ibv_device_attr *attr) 100 { 101 struct ibv_query_device cmd; 102 uint64_t fw_ver; 103 int ret; 104 105 ret = ibv_cmd_query_device(context, attr, &fw_ver, &cmd, sizeof(cmd)); 106 if (ret) 107 return ret; 108 109 print_fw_ver(fw_ver, attr->fw_ver, sizeof(attr->fw_ver)); 110 111 return 0; 112 } 113 114 /** 115 * irdma_uquery_port - get port attributes (msg size, lnk, mtu...) 116 * @context: user context of the device 117 * @port: port for the attributes 118 * @attr: to return port attributes 119 **/ 120 int 121 irdma_uquery_port(struct ibv_context *context, uint8_t port, 122 struct ibv_port_attr *attr) 123 { 124 struct ibv_query_port cmd; 125 126 return ibv_cmd_query_port(context, port, attr, &cmd, sizeof(cmd)); 127 } 128 129 /** 130 * irdma_ualloc_pd - allocates protection domain and return pd ptr 131 * @context: user context of the device 132 **/ 133 struct ibv_pd * 134 irdma_ualloc_pd(struct ibv_context *context) 135 { 136 struct ibv_alloc_pd cmd; 137 struct irdma_ualloc_pd_resp resp = {}; 138 struct irdma_upd *iwupd; 139 int err; 140 141 iwupd = calloc(1, sizeof(*iwupd)); 142 if (!iwupd) 143 return NULL; 144 145 err = ibv_cmd_alloc_pd(context, &iwupd->ibv_pd, &cmd, sizeof(cmd), 146 &resp.ibv_resp, sizeof(resp)); 147 if (err) 148 goto err_free; 149 150 iwupd->pd_id = resp.pd_id; 151 152 return &iwupd->ibv_pd; 153 154 err_free: 155 free(iwupd); 156 errno = err; 157 return NULL; 158 } 159 160 /** 161 * irdma_ufree_pd - free pd resources 162 * @pd: pd to free resources 163 */ 164 int 165 irdma_ufree_pd(struct ibv_pd *pd) 166 { 167 struct irdma_uvcontext *iwvctx = container_of(pd->context, struct irdma_uvcontext, ibv_ctx); 168 struct irdma_upd *iwupd; 169 int ret; 170 171 iwupd = container_of(pd, struct irdma_upd, ibv_pd); 172 ret = ibv_cmd_dealloc_pd(pd); 173 if (ret) 174 return ret; 175 176 free(iwupd); 177 178 return 0; 179 } 180 181 /** 182 * irdma_ureg_mr - register user memory region 183 * @pd: pd for the mr 184 * @addr: user address of the memory region 185 * @length: length of the memory 186 * @hca_va: hca_va 187 * @access: access allowed on this mr 188 */ 189 struct ibv_mr * 190 irdma_ureg_mr(struct ibv_pd *pd, void *addr, size_t length, 191 int access) 192 { 193 struct verbs_mr *vmr; 194 struct irdma_ureg_mr cmd = {}; 195 struct ibv_reg_mr_resp resp; 196 int err; 197 198 vmr = malloc(sizeof(*vmr)); 199 if (!vmr) 200 return NULL; 201 202 cmd.reg_type = IRDMA_MEMREG_TYPE_MEM; 203 err = ibv_cmd_reg_mr(pd, addr, length, 204 (uintptr_t)addr, access, &vmr->ibv_mr, &cmd.ibv_cmd, 205 sizeof(cmd), &resp, sizeof(resp)); 206 if (err) { 207 free(vmr); 208 errno = err; 209 return NULL; 210 } 211 212 return &vmr->ibv_mr; 213 } 214 215 /* 216 * irdma_urereg_mr - re-register memory region @vmr: mr that was allocated @flags: bit mask to indicate which of the 217 * attr's of MR modified @pd: pd of the mr @addr: user address of the memory region @length: length of the memory 218 * @access: access allowed on this mr 219 */ 220 int 221 irdma_urereg_mr(struct verbs_mr *vmr, int flags, struct ibv_pd *pd, 222 void *addr, size_t length, int access) 223 { 224 struct irdma_urereg_mr cmd = {}; 225 struct ibv_rereg_mr_resp resp; 226 227 cmd.reg_type = IRDMA_MEMREG_TYPE_MEM; 228 return ibv_cmd_rereg_mr(&vmr->ibv_mr, flags, addr, length, (uintptr_t)addr, 229 access, pd, &cmd.ibv_cmd, sizeof(cmd), &resp, 230 sizeof(resp)); 231 } 232 233 /** 234 * irdma_udereg_mr - re-register memory region 235 * @mr: mr that was allocated 236 */ 237 int 238 irdma_udereg_mr(struct ibv_mr *mr) 239 { 240 struct verbs_mr *vmr; 241 int ret; 242 243 vmr = container_of(mr, struct verbs_mr, ibv_mr); 244 245 ret = ibv_cmd_dereg_mr(mr); 246 if (ret) 247 return ret; 248 249 return 0; 250 } 251 252 /** 253 * irdma_ualloc_mw - allocate memory window 254 * @pd: protection domain 255 * @type: memory window type 256 */ 257 struct ibv_mw * 258 irdma_ualloc_mw(struct ibv_pd *pd, enum ibv_mw_type type) 259 { 260 struct ibv_mw *mw; 261 struct ibv_alloc_mw cmd; 262 struct ibv_alloc_mw_resp resp; 263 int err; 264 265 mw = calloc(1, sizeof(*mw)); 266 if (!mw) 267 return NULL; 268 269 err = ibv_cmd_alloc_mw(pd, type, mw, &cmd, sizeof(cmd), &resp, 270 sizeof(resp)); 271 if (err) { 272 printf("%s: Failed to alloc memory window\n", 273 __func__); 274 free(mw); 275 errno = err; 276 return NULL; 277 } 278 279 return mw; 280 } 281 282 /** 283 * irdma_ubind_mw - bind a memory window 284 * @qp: qp to post WR 285 * @mw: memory window to bind 286 * @mw_bind: bind info 287 */ 288 int 289 irdma_ubind_mw(struct ibv_qp *qp, struct ibv_mw *mw, 290 struct ibv_mw_bind *mw_bind) 291 { 292 struct ibv_mw_bind_info *bind_info = &mw_bind->bind_info; 293 struct verbs_mr *vmr; 294 295 struct ibv_send_wr wr = {}; 296 struct ibv_send_wr *bad_wr; 297 int err; 298 299 if (!bind_info->mr && (bind_info->addr || bind_info->length)) 300 return EINVAL; 301 302 if (bind_info->mr) { 303 vmr = verbs_get_mr(bind_info->mr); 304 if (vmr->mr_type != IBV_MR_TYPE_MR) 305 return ENOTSUP; 306 307 if (vmr->access & IBV_ACCESS_ZERO_BASED) 308 return EINVAL; 309 310 if (mw->pd != bind_info->mr->pd) 311 return EPERM; 312 } 313 314 wr.opcode = IBV_WR_BIND_MW; 315 wr.bind_mw.bind_info = mw_bind->bind_info; 316 wr.bind_mw.mw = mw; 317 wr.bind_mw.rkey = ibv_inc_rkey(mw->rkey); 318 319 wr.wr_id = mw_bind->wr_id; 320 wr.send_flags = mw_bind->send_flags; 321 322 err = irdma_upost_send(qp, &wr, &bad_wr); 323 if (!err) 324 mw->rkey = wr.bind_mw.rkey; 325 326 return err; 327 } 328 329 /** 330 * irdma_udealloc_mw - deallocate memory window 331 * @mw: memory window to dealloc 332 */ 333 int 334 irdma_udealloc_mw(struct ibv_mw *mw) 335 { 336 int ret; 337 struct ibv_dealloc_mw cmd; 338 339 ret = ibv_cmd_dealloc_mw(mw, &cmd, sizeof(cmd)); 340 if (ret) 341 return ret; 342 free(mw); 343 344 return 0; 345 } 346 347 static void * 348 irdma_alloc_hw_buf(size_t size) 349 { 350 void *buf; 351 352 buf = memalign(IRDMA_HW_PAGE_SIZE, size); 353 354 if (!buf) 355 return NULL; 356 if (ibv_dontfork_range(buf, size)) { 357 free(buf); 358 return NULL; 359 } 360 361 return buf; 362 } 363 364 static void 365 irdma_free_hw_buf(void *buf, size_t size) 366 { 367 ibv_dofork_range(buf, size); 368 free(buf); 369 } 370 371 /** 372 * get_cq_size - returns actual cqe needed by HW 373 * @ncqe: minimum cqes requested by application 374 * @hw_rev: HW generation 375 * @cqe_64byte_ena: enable 64byte cqe 376 */ 377 static inline int 378 get_cq_size(int ncqe, u8 hw_rev, bool cqe_64byte_ena) 379 { 380 ncqe++; 381 382 /* Completions with immediate require 1 extra entry */ 383 if (!cqe_64byte_ena && hw_rev > IRDMA_GEN_1) 384 ncqe *= 2; 385 386 if (ncqe < IRDMA_U_MINCQ_SIZE) 387 ncqe = IRDMA_U_MINCQ_SIZE; 388 389 return ncqe; 390 } 391 392 static inline size_t get_cq_total_bytes(u32 cq_size, bool cqe_64byte_ena){ 393 if (cqe_64byte_ena) 394 return roundup(cq_size * sizeof(struct irdma_extended_cqe), IRDMA_HW_PAGE_SIZE); 395 else 396 return roundup(cq_size * sizeof(struct irdma_cqe), IRDMA_HW_PAGE_SIZE); 397 } 398 399 /** 400 * ucreate_cq - irdma util function to create a CQ 401 * @context: ibv context 402 * @attr_ex: CQ init attributes 403 * @ext_cq: flag to create an extendable or normal CQ 404 */ 405 static struct ibv_cq_ex * 406 ucreate_cq(struct ibv_context *context, 407 struct ibv_cq_init_attr_ex *attr_ex, 408 bool ext_cq) 409 { 410 struct irdma_cq_uk_init_info info = {}; 411 struct irdma_ureg_mr reg_mr_cmd = {}; 412 struct irdma_ucreate_cq_ex cmd = {}; 413 struct irdma_ucreate_cq_ex_resp resp = {}; 414 struct ibv_reg_mr_resp reg_mr_resp = {}; 415 struct irdma_ureg_mr reg_mr_shadow_cmd = {}; 416 struct ibv_reg_mr_resp reg_mr_shadow_resp = {}; 417 struct irdma_uk_attrs *uk_attrs; 418 struct irdma_uvcontext *iwvctx; 419 struct irdma_ucq *iwucq; 420 size_t total_size; 421 u32 cq_pages; 422 int ret, ncqe; 423 u8 hw_rev; 424 bool cqe_64byte_ena; 425 426 iwvctx = container_of(context, struct irdma_uvcontext, ibv_ctx); 427 uk_attrs = &iwvctx->uk_attrs; 428 hw_rev = uk_attrs->hw_rev; 429 430 if (ext_cq) { 431 u32 supported_flags = IRDMA_STANDARD_WC_FLAGS_EX; 432 433 if (hw_rev == IRDMA_GEN_1 || attr_ex->wc_flags & ~supported_flags) { 434 errno = EOPNOTSUPP; 435 return NULL; 436 } 437 } 438 439 if (attr_ex->cqe < uk_attrs->min_hw_cq_size || attr_ex->cqe > uk_attrs->max_hw_cq_size - 1) { 440 errno = EINVAL; 441 return NULL; 442 } 443 444 /* save the cqe requested by application */ 445 ncqe = attr_ex->cqe; 446 447 iwucq = calloc(1, sizeof(*iwucq)); 448 if (!iwucq) 449 return NULL; 450 451 ret = pthread_spin_init(&iwucq->lock, PTHREAD_PROCESS_PRIVATE); 452 if (ret) { 453 free(iwucq); 454 errno = ret; 455 return NULL; 456 } 457 458 cqe_64byte_ena = uk_attrs->feature_flags & IRDMA_FEATURE_64_BYTE_CQE ? true : false; 459 info.cq_size = get_cq_size(attr_ex->cqe, hw_rev, cqe_64byte_ena); 460 iwucq->comp_vector = attr_ex->comp_vector; 461 LIST_INIT(&iwucq->resize_list); 462 LIST_INIT(&iwucq->cmpl_generated); 463 total_size = get_cq_total_bytes(info.cq_size, cqe_64byte_ena); 464 cq_pages = total_size >> IRDMA_HW_PAGE_SHIFT; 465 466 if (!(uk_attrs->feature_flags & IRDMA_FEATURE_CQ_RESIZE)) 467 total_size = (cq_pages << IRDMA_HW_PAGE_SHIFT) + IRDMA_DB_SHADOW_AREA_SIZE; 468 469 iwucq->buf_size = total_size; 470 info.cq_base = irdma_alloc_hw_buf(total_size); 471 if (!info.cq_base) { 472 ret = ENOMEM; 473 goto err_cq_base; 474 } 475 476 memset(info.cq_base, 0, total_size); 477 reg_mr_cmd.reg_type = IRDMA_MEMREG_TYPE_CQ; 478 reg_mr_cmd.cq_pages = cq_pages; 479 480 ret = ibv_cmd_reg_mr(&iwvctx->iwupd->ibv_pd, info.cq_base, 481 total_size, (uintptr_t)info.cq_base, 482 IBV_ACCESS_LOCAL_WRITE, &iwucq->vmr.ibv_mr, 483 ®_mr_cmd.ibv_cmd, sizeof(reg_mr_cmd), 484 ®_mr_resp, sizeof(reg_mr_resp)); 485 if (ret) 486 goto err_dereg_mr; 487 488 iwucq->vmr.ibv_mr.pd = &iwvctx->iwupd->ibv_pd; 489 490 if (uk_attrs->feature_flags & IRDMA_FEATURE_CQ_RESIZE) { 491 info.shadow_area = irdma_alloc_hw_buf(IRDMA_DB_SHADOW_AREA_SIZE); 492 if (!info.shadow_area) { 493 ret = ENOMEM; 494 goto err_alloc_shadow; 495 } 496 497 memset(info.shadow_area, 0, IRDMA_DB_SHADOW_AREA_SIZE); 498 reg_mr_shadow_cmd.reg_type = IRDMA_MEMREG_TYPE_CQ; 499 reg_mr_shadow_cmd.cq_pages = 1; 500 501 ret = ibv_cmd_reg_mr(&iwvctx->iwupd->ibv_pd, info.shadow_area, 502 IRDMA_DB_SHADOW_AREA_SIZE, (uintptr_t)info.shadow_area, 503 IBV_ACCESS_LOCAL_WRITE, &iwucq->vmr_shadow_area.ibv_mr, 504 ®_mr_shadow_cmd.ibv_cmd, sizeof(reg_mr_shadow_cmd), 505 ®_mr_shadow_resp, sizeof(reg_mr_shadow_resp)); 506 if (ret) { 507 irdma_free_hw_buf(info.shadow_area, IRDMA_DB_SHADOW_AREA_SIZE); 508 goto err_alloc_shadow; 509 } 510 511 iwucq->vmr_shadow_area.ibv_mr.pd = &iwvctx->iwupd->ibv_pd; 512 513 } else { 514 info.shadow_area = (__le64 *) ((u8 *)info.cq_base + (cq_pages << IRDMA_HW_PAGE_SHIFT)); 515 } 516 517 attr_ex->cqe = info.cq_size; 518 cmd.user_cq_buf = (__u64) ((uintptr_t)info.cq_base); 519 cmd.user_shadow_area = (__u64) ((uintptr_t)info.shadow_area); 520 521 ret = ibv_cmd_create_cq_ex(context, attr_ex, &iwucq->verbs_cq.cq_ex, 522 &cmd.ibv_cmd, sizeof(cmd.ibv_cmd), sizeof(cmd), &resp.ibv_resp, 523 sizeof(resp.ibv_resp), sizeof(resp)); 524 attr_ex->cqe = ncqe; 525 if (ret) 526 goto err_create_cq; 527 528 if (ext_cq) 529 irdma_ibvcq_ex_fill_priv_funcs(iwucq, attr_ex); 530 info.cq_id = resp.cq_id; 531 /* Do not report the CQE's reserved for immediate and burned by HW */ 532 iwucq->verbs_cq.cq.cqe = ncqe; 533 if (cqe_64byte_ena) 534 info.avoid_mem_cflct = true; 535 info.cqe_alloc_db = (u32 *)((u8 *)iwvctx->db + IRDMA_DB_CQ_OFFSET); 536 irdma_uk_cq_init(&iwucq->cq, &info); 537 return &iwucq->verbs_cq.cq_ex; 538 539 err_create_cq: 540 if (iwucq->vmr_shadow_area.ibv_mr.handle) { 541 ibv_cmd_dereg_mr(&iwucq->vmr_shadow_area.ibv_mr); 542 irdma_free_hw_buf(info.shadow_area, IRDMA_DB_SHADOW_AREA_SIZE); 543 } 544 err_alloc_shadow: 545 ibv_cmd_dereg_mr(&iwucq->vmr.ibv_mr); 546 err_dereg_mr: 547 irdma_free_hw_buf(info.cq_base, total_size); 548 err_cq_base: 549 printf("%s: failed to initialize CQ\n", __func__); 550 pthread_spin_destroy(&iwucq->lock); 551 552 free(iwucq); 553 554 errno = ret; 555 return NULL; 556 } 557 558 struct ibv_cq * 559 irdma_ucreate_cq(struct ibv_context *context, int cqe, 560 struct ibv_comp_channel *channel, 561 int comp_vector) 562 { 563 struct ibv_cq_init_attr_ex attr_ex = { 564 .cqe = cqe, 565 .channel = channel, 566 .comp_vector = comp_vector, 567 }; 568 struct ibv_cq_ex *ibvcq_ex; 569 570 ibvcq_ex = ucreate_cq(context, &attr_ex, false); 571 572 return ibvcq_ex ? ibv_cq_ex_to_cq(ibvcq_ex) : NULL; 573 } 574 575 struct ibv_cq_ex * 576 irdma_ucreate_cq_ex(struct ibv_context *context, 577 struct ibv_cq_init_attr_ex *attr_ex) 578 { 579 return ucreate_cq(context, attr_ex, true); 580 } 581 582 /** 583 * irdma_free_cq_buf - free memory for cq buffer 584 * @cq_buf: cq buf to free 585 */ 586 static void 587 irdma_free_cq_buf(struct irdma_cq_buf *cq_buf) 588 { 589 ibv_cmd_dereg_mr(&cq_buf->vmr.ibv_mr); 590 irdma_free_hw_buf(cq_buf->cq.cq_base, cq_buf->buf_size); 591 free(cq_buf); 592 } 593 594 /** 595 * irdma_process_resize_list - process the cq list to remove buffers 596 * @iwucq: cq which owns the list 597 * @lcqe_buf: cq buf where the last cqe is found 598 */ 599 static int 600 irdma_process_resize_list(struct irdma_ucq *iwucq, 601 struct irdma_cq_buf *lcqe_buf) 602 { 603 struct irdma_cq_buf *cq_buf, *next; 604 int cq_cnt = 0; 605 606 LIST_FOREACH_SAFE(cq_buf, &iwucq->resize_list, list, next) { 607 if (cq_buf == lcqe_buf) 608 return cq_cnt; 609 610 LIST_REMOVE(cq_buf, list); 611 irdma_free_cq_buf(cq_buf); 612 cq_cnt++; 613 } 614 615 return cq_cnt; 616 } 617 618 static void 619 irdma_remove_cmpls_list(struct irdma_ucq *iwucq) 620 { 621 struct irdma_cmpl_gen *cmpl_node, *next; 622 623 LIST_FOREACH_SAFE(cmpl_node, &iwucq->cmpl_generated, list, next) { 624 LIST_REMOVE(cmpl_node, list); 625 free(cmpl_node); 626 } 627 } 628 629 static int 630 irdma_generated_cmpls(struct irdma_ucq *iwucq, struct irdma_cq_poll_info *cq_poll_info) 631 { 632 struct irdma_cmpl_gen *cmpl; 633 634 if (!iwucq || LIST_EMPTY(&iwucq->cmpl_generated)) 635 return ENOENT; 636 cmpl = LIST_FIRST(&iwucq->cmpl_generated); 637 LIST_REMOVE(cmpl, list); 638 memcpy(cq_poll_info, &cmpl->cpi, sizeof(*cq_poll_info)); 639 640 free(cmpl); 641 642 return 0; 643 } 644 645 /** 646 * irdma_set_cpi_common_values - fill in values for polling info struct 647 * @cpi: resulting structure of cq_poll_info type 648 * @qp: QPair 649 * @qp_num: id of the QP 650 */ 651 static void 652 irdma_set_cpi_common_values(struct irdma_cq_poll_info *cpi, 653 struct irdma_qp_uk *qp, __u32 qp_num) 654 { 655 cpi->comp_status = IRDMA_COMPL_STATUS_FLUSHED; 656 cpi->error = 1; 657 cpi->major_err = IRDMA_FLUSH_MAJOR_ERR; 658 cpi->minor_err = FLUSH_GENERAL_ERR; 659 cpi->qp_handle = (irdma_qp_handle) (uintptr_t)qp; 660 cpi->qp_id = qp_num; 661 } 662 663 static bool 664 irdma_cq_empty(struct irdma_ucq *iwucq) 665 { 666 struct irdma_cq_uk *ukcq; 667 __u64 qword3; 668 __le64 *cqe; 669 __u8 polarity; 670 671 ukcq = &iwucq->cq; 672 cqe = IRDMA_GET_CURRENT_CQ_ELEM(ukcq); 673 get_64bit_val(cqe, 24, &qword3); 674 polarity = (__u8) FIELD_GET(IRDMA_CQ_VALID, qword3); 675 676 return polarity != ukcq->polarity; 677 } 678 679 /** 680 * irdma_generate_flush_completions - generate completion from WRs 681 * @iwuqp: pointer to QP 682 */ 683 static void 684 irdma_generate_flush_completions(struct irdma_uqp *iwuqp) 685 { 686 struct irdma_qp_uk *qp = &iwuqp->qp; 687 struct irdma_ring *sq_ring = &qp->sq_ring; 688 struct irdma_ring *rq_ring = &qp->rq_ring; 689 struct irdma_cmpl_gen *cmpl; 690 __le64 *sw_wqe; 691 __u64 wqe_qword; 692 __u32 wqe_idx; 693 694 if (pthread_spin_lock(&iwuqp->send_cq->lock)) 695 return; 696 if (irdma_cq_empty(iwuqp->send_cq)) { 697 while (IRDMA_RING_MORE_WORK(*sq_ring)) { 698 cmpl = malloc(sizeof(*cmpl)); 699 if (!cmpl) { 700 pthread_spin_unlock(&iwuqp->send_cq->lock); 701 return; 702 } 703 704 wqe_idx = sq_ring->tail; 705 irdma_set_cpi_common_values(&cmpl->cpi, qp, qp->qp_id); 706 cmpl->cpi.wr_id = qp->sq_wrtrk_array[wqe_idx].wrid; 707 sw_wqe = qp->sq_base[wqe_idx].elem; 708 get_64bit_val(sw_wqe, 24, &wqe_qword); 709 cmpl->cpi.op_type = (__u8) FIELD_GET(IRDMAQPSQ_OPCODE, wqe_qword); 710 /* remove the SQ WR by moving SQ tail */ 711 IRDMA_RING_SET_TAIL(*sq_ring, sq_ring->tail + qp->sq_wrtrk_array[sq_ring->tail].quanta); 712 LIST_INSERT_HEAD(&iwuqp->send_cq->cmpl_generated, cmpl, list); 713 } 714 } 715 pthread_spin_unlock(&iwuqp->send_cq->lock); 716 if (pthread_spin_lock(&iwuqp->recv_cq->lock)) 717 return; 718 if (irdma_cq_empty(iwuqp->recv_cq)) { 719 while (IRDMA_RING_MORE_WORK(*rq_ring)) { 720 cmpl = malloc(sizeof(*cmpl)); 721 if (!cmpl) { 722 pthread_spin_unlock(&iwuqp->recv_cq->lock); 723 return; 724 } 725 726 wqe_idx = rq_ring->tail; 727 irdma_set_cpi_common_values(&cmpl->cpi, qp, qp->qp_id); 728 cmpl->cpi.wr_id = qp->rq_wrid_array[wqe_idx]; 729 cmpl->cpi.op_type = IRDMA_OP_TYPE_REC; 730 /* remove the RQ WR by moving RQ tail */ 731 IRDMA_RING_SET_TAIL(*rq_ring, rq_ring->tail + 1); 732 LIST_INSERT_HEAD(&iwuqp->recv_cq->cmpl_generated, cmpl, list); 733 } 734 } 735 pthread_spin_unlock(&iwuqp->recv_cq->lock); 736 } 737 738 void * 739 irdma_flush_thread(void *arg) 740 { 741 __u8 i = 5; 742 struct irdma_uqp *iwuqp = arg; 743 744 while (--i) { 745 if (pthread_spin_lock(&iwuqp->lock)) 746 break; 747 irdma_generate_flush_completions(arg); 748 pthread_spin_unlock(&iwuqp->lock); 749 sleep(1); 750 } 751 pthread_exit(NULL); 752 } 753 754 /** 755 * irdma_udestroy_cq - destroys cq 756 * @cq: ptr to cq to be destroyed 757 */ 758 int 759 irdma_udestroy_cq(struct ibv_cq *cq) 760 { 761 struct irdma_uk_attrs *uk_attrs; 762 struct irdma_uvcontext *iwvctx; 763 struct irdma_ucq *iwucq; 764 int ret; 765 766 iwucq = container_of(cq, struct irdma_ucq, verbs_cq.cq); 767 iwvctx = container_of(cq->context, struct irdma_uvcontext, ibv_ctx); 768 uk_attrs = &iwvctx->uk_attrs; 769 770 ret = pthread_spin_destroy(&iwucq->lock); 771 if (ret) 772 goto err; 773 774 if (!LIST_EMPTY(&iwucq->cmpl_generated)) 775 irdma_remove_cmpls_list(iwucq); 776 irdma_process_resize_list(iwucq, NULL); 777 ret = ibv_cmd_destroy_cq(cq); 778 if (ret) 779 goto err; 780 781 ibv_cmd_dereg_mr(&iwucq->vmr.ibv_mr); 782 irdma_free_hw_buf(iwucq->cq.cq_base, iwucq->buf_size); 783 784 if (uk_attrs->feature_flags & IRDMA_FEATURE_CQ_RESIZE) { 785 ibv_cmd_dereg_mr(&iwucq->vmr_shadow_area.ibv_mr); 786 irdma_free_hw_buf(iwucq->cq.shadow_area, IRDMA_DB_SHADOW_AREA_SIZE); 787 } 788 free(iwucq); 789 return 0; 790 791 err: 792 return ret; 793 } 794 795 static enum ibv_wc_status 796 irdma_flush_err_to_ib_wc_status(enum irdma_flush_opcode opcode) 797 { 798 switch (opcode) { 799 case FLUSH_PROT_ERR: 800 return IBV_WC_LOC_PROT_ERR; 801 case FLUSH_REM_ACCESS_ERR: 802 return IBV_WC_REM_ACCESS_ERR; 803 case FLUSH_LOC_QP_OP_ERR: 804 return IBV_WC_LOC_QP_OP_ERR; 805 case FLUSH_REM_OP_ERR: 806 return IBV_WC_REM_OP_ERR; 807 case FLUSH_LOC_LEN_ERR: 808 return IBV_WC_LOC_LEN_ERR; 809 case FLUSH_GENERAL_ERR: 810 return IBV_WC_WR_FLUSH_ERR; 811 case FLUSH_MW_BIND_ERR: 812 return IBV_WC_MW_BIND_ERR; 813 case FLUSH_REM_INV_REQ_ERR: 814 return IBV_WC_REM_INV_REQ_ERR; 815 case FLUSH_RETRY_EXC_ERR: 816 return IBV_WC_RETRY_EXC_ERR; 817 case FLUSH_FATAL_ERR: 818 default: 819 return IBV_WC_FATAL_ERR; 820 } 821 } 822 823 static inline void 824 set_ib_wc_op_sq(struct irdma_cq_poll_info *cur_cqe, struct ibv_wc *entry) 825 { 826 switch (cur_cqe->op_type) { 827 case IRDMA_OP_TYPE_RDMA_WRITE: 828 case IRDMA_OP_TYPE_RDMA_WRITE_SOL: 829 entry->opcode = IBV_WC_RDMA_WRITE; 830 break; 831 case IRDMA_OP_TYPE_RDMA_READ: 832 entry->opcode = IBV_WC_RDMA_READ; 833 break; 834 case IRDMA_OP_TYPE_SEND_SOL: 835 case IRDMA_OP_TYPE_SEND_SOL_INV: 836 case IRDMA_OP_TYPE_SEND_INV: 837 case IRDMA_OP_TYPE_SEND: 838 entry->opcode = IBV_WC_SEND; 839 break; 840 case IRDMA_OP_TYPE_BIND_MW: 841 entry->opcode = IBV_WC_BIND_MW; 842 break; 843 case IRDMA_OP_TYPE_INV_STAG: 844 entry->opcode = IBV_WC_LOCAL_INV; 845 break; 846 default: 847 entry->status = IBV_WC_GENERAL_ERR; 848 printf("%s: Invalid opcode = %d in CQE\n", 849 __func__, cur_cqe->op_type); 850 } 851 } 852 853 static inline void 854 set_ib_wc_op_rq(struct irdma_cq_poll_info *cur_cqe, 855 struct ibv_wc *entry, bool send_imm_support) 856 { 857 if (!send_imm_support) { 858 entry->opcode = cur_cqe->imm_valid ? IBV_WC_RECV_RDMA_WITH_IMM : 859 IBV_WC_RECV; 860 return; 861 } 862 switch (cur_cqe->op_type) { 863 case IBV_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE: 864 case IBV_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE: 865 entry->opcode = IBV_WC_RECV_RDMA_WITH_IMM; 866 break; 867 default: 868 entry->opcode = IBV_WC_RECV; 869 } 870 } 871 872 /** 873 * irdma_process_cqe_ext - process current cqe for extended CQ 874 * @cur_cqe - current cqe info 875 */ 876 static void 877 irdma_process_cqe_ext(struct irdma_cq_poll_info *cur_cqe) 878 { 879 struct irdma_ucq *iwucq = container_of(cur_cqe, struct irdma_ucq, cur_cqe); 880 struct ibv_cq_ex *ibvcq_ex = &iwucq->verbs_cq.cq_ex; 881 882 ibvcq_ex->wr_id = cur_cqe->wr_id; 883 if (cur_cqe->error) 884 ibvcq_ex->status = (cur_cqe->comp_status == IRDMA_COMPL_STATUS_FLUSHED) ? 885 irdma_flush_err_to_ib_wc_status(cur_cqe->minor_err) : IBV_WC_GENERAL_ERR; 886 else 887 ibvcq_ex->status = IBV_WC_SUCCESS; 888 } 889 890 /** 891 * irdma_process_cqe - process current cqe info 892 * @entry - ibv_wc object to fill in for non-extended CQ 893 * @cur_cqe - current cqe info 894 */ 895 static void 896 irdma_process_cqe(struct ibv_wc *entry, struct irdma_cq_poll_info *cur_cqe) 897 { 898 struct irdma_qp_uk *qp; 899 struct ibv_qp *ib_qp; 900 901 entry->wc_flags = 0; 902 entry->wr_id = cur_cqe->wr_id; 903 entry->qp_num = cur_cqe->qp_id; 904 qp = cur_cqe->qp_handle; 905 ib_qp = qp->back_qp; 906 907 if (cur_cqe->error) { 908 entry->status = (cur_cqe->comp_status == IRDMA_COMPL_STATUS_FLUSHED) ? 909 irdma_flush_err_to_ib_wc_status(cur_cqe->minor_err) : IBV_WC_GENERAL_ERR; 910 entry->vendor_err = cur_cqe->major_err << 16 | 911 cur_cqe->minor_err; 912 } else { 913 entry->status = IBV_WC_SUCCESS; 914 } 915 916 if (cur_cqe->imm_valid) { 917 entry->imm_data = htonl(cur_cqe->imm_data); 918 entry->wc_flags |= IBV_WC_WITH_IMM; 919 } 920 921 if (cur_cqe->q_type == IRDMA_CQE_QTYPE_SQ) { 922 set_ib_wc_op_sq(cur_cqe, entry); 923 } else { 924 set_ib_wc_op_rq(cur_cqe, entry, 925 qp->qp_caps & IRDMA_SEND_WITH_IMM ? 926 true : false); 927 if (ib_qp->qp_type != IBV_QPT_UD && 928 cur_cqe->stag_invalid_set) { 929 entry->invalidated_rkey = cur_cqe->inv_stag; 930 entry->wc_flags |= IBV_WC_WITH_INV; 931 } 932 } 933 934 if (ib_qp->qp_type == IBV_QPT_UD) { 935 entry->src_qp = cur_cqe->ud_src_qpn; 936 entry->wc_flags |= IBV_WC_GRH; 937 } else { 938 entry->src_qp = cur_cqe->qp_id; 939 } 940 entry->byte_len = cur_cqe->bytes_xfered; 941 } 942 943 /** 944 * irdma_poll_one - poll one entry of the CQ 945 * @ukcq: ukcq to poll 946 * @cur_cqe: current CQE info to be filled in 947 * @entry: ibv_wc object to be filled for non-extended CQ or NULL for extended CQ 948 * 949 * Returns the internal irdma device error code or 0 on success 950 */ 951 static int 952 irdma_poll_one(struct irdma_cq_uk *ukcq, struct irdma_cq_poll_info *cur_cqe, 953 struct ibv_wc *entry) 954 { 955 int ret = irdma_uk_cq_poll_cmpl(ukcq, cur_cqe); 956 957 if (ret) 958 return ret; 959 960 if (!entry) 961 irdma_process_cqe_ext(cur_cqe); 962 else 963 irdma_process_cqe(entry, cur_cqe); 964 965 return 0; 966 } 967 968 /** 969 * __irdma_upoll_cq - irdma util function to poll device CQ 970 * @iwucq: irdma cq to poll 971 * @num_entries: max cq entries to poll 972 * @entry: pointer to array of ibv_wc objects to be filled in for each completion or NULL if ext CQ 973 * 974 * Returns non-negative value equal to the number of completions 975 * found. On failure, EINVAL 976 */ 977 static int 978 __irdma_upoll_cq(struct irdma_ucq *iwucq, int num_entries, 979 struct ibv_wc *entry) 980 { 981 struct irdma_cq_buf *cq_buf, *next; 982 struct irdma_cq_buf *last_buf = NULL; 983 struct irdma_cq_poll_info *cur_cqe = &iwucq->cur_cqe; 984 bool cq_new_cqe = false; 985 int resized_bufs = 0; 986 int npolled = 0; 987 int ret; 988 989 /* go through the list of previously resized CQ buffers */ 990 LIST_FOREACH_SAFE(cq_buf, &iwucq->resize_list, list, next) { 991 while (npolled < num_entries) { 992 ret = irdma_poll_one(&cq_buf->cq, cur_cqe, 993 entry ? entry + npolled : NULL); 994 if (!ret) { 995 ++npolled; 996 cq_new_cqe = true; 997 continue; 998 } 999 if (ret == ENOENT) 1000 break; 1001 /* QP using the CQ is destroyed. Skip reporting this CQE */ 1002 if (ret == EFAULT) { 1003 cq_new_cqe = true; 1004 continue; 1005 } 1006 goto error; 1007 } 1008 1009 /* save the resized CQ buffer which received the last cqe */ 1010 if (cq_new_cqe) 1011 last_buf = cq_buf; 1012 cq_new_cqe = false; 1013 } 1014 1015 /* check the current CQ for new cqes */ 1016 while (npolled < num_entries) { 1017 ret = irdma_poll_one(&iwucq->cq, cur_cqe, 1018 entry ? entry + npolled : NULL); 1019 if (ret == ENOENT) { 1020 ret = irdma_generated_cmpls(iwucq, cur_cqe); 1021 if (!ret) { 1022 if (entry) 1023 irdma_process_cqe(entry + npolled, cur_cqe); 1024 else 1025 irdma_process_cqe_ext(cur_cqe); 1026 } 1027 } 1028 if (!ret) { 1029 ++npolled; 1030 cq_new_cqe = true; 1031 continue; 1032 } 1033 if (ret == ENOENT) 1034 break; 1035 /* QP using the CQ is destroyed. Skip reporting this CQE */ 1036 if (ret == EFAULT) { 1037 cq_new_cqe = true; 1038 continue; 1039 } 1040 goto error; 1041 } 1042 1043 if (cq_new_cqe) 1044 /* all previous CQ resizes are complete */ 1045 resized_bufs = irdma_process_resize_list(iwucq, NULL); 1046 else if (last_buf) 1047 /* only CQ resizes up to the last_buf are complete */ 1048 resized_bufs = irdma_process_resize_list(iwucq, last_buf); 1049 if (resized_bufs) 1050 /* report to the HW the number of complete CQ resizes */ 1051 irdma_uk_cq_set_resized_cnt(&iwucq->cq, resized_bufs); 1052 1053 return npolled; 1054 1055 error: 1056 printf("%s: Error polling CQ, irdma_err: %d\n", __func__, ret); 1057 1058 return EINVAL; 1059 } 1060 1061 /** 1062 * irdma_upoll_cq - verb API callback to poll device CQ 1063 * @cq: ibv_cq to poll 1064 * @num_entries: max cq entries to poll 1065 * @entry: pointer to array of ibv_wc objects to be filled in for each completion 1066 * 1067 * Returns non-negative value equal to the number of completions 1068 * found and a negative error code on failure 1069 */ 1070 int 1071 irdma_upoll_cq(struct ibv_cq *cq, int num_entries, struct ibv_wc *entry) 1072 { 1073 struct irdma_ucq *iwucq; 1074 int ret; 1075 1076 iwucq = container_of(cq, struct irdma_ucq, verbs_cq.cq); 1077 ret = pthread_spin_lock(&iwucq->lock); 1078 if (ret) 1079 return -ret; 1080 1081 ret = __irdma_upoll_cq(iwucq, num_entries, entry); 1082 1083 pthread_spin_unlock(&iwucq->lock); 1084 1085 return ret; 1086 } 1087 1088 /** 1089 * irdma_start_poll - verb_ex API callback to poll batch of WC's 1090 * @ibvcq_ex: ibv extended CQ 1091 * @attr: attributes (not used) 1092 * 1093 * Start polling batch of work completions. Return 0 on success, ENONENT when 1094 * no completions are available on CQ. And an error code on errors 1095 */ 1096 static int 1097 irdma_start_poll(struct ibv_cq_ex *ibvcq_ex, struct ibv_poll_cq_attr *attr) 1098 { 1099 struct irdma_ucq *iwucq; 1100 int ret; 1101 1102 iwucq = container_of(ibvcq_ex, struct irdma_ucq, verbs_cq.cq_ex); 1103 ret = pthread_spin_lock(&iwucq->lock); 1104 if (ret) 1105 return ret; 1106 1107 ret = __irdma_upoll_cq(iwucq, 1, NULL); 1108 if (ret == 1) 1109 return 0; 1110 1111 /* No Completions on CQ */ 1112 if (!ret) 1113 ret = ENOENT; 1114 1115 pthread_spin_unlock(&iwucq->lock); 1116 1117 return ret; 1118 } 1119 1120 /** 1121 * irdma_next_poll - verb_ex API callback to get next WC 1122 * @ibvcq_ex: ibv extended CQ 1123 * 1124 * Return 0 on success, ENONENT when no completions are available on CQ. 1125 * And an error code on errors 1126 */ 1127 static int 1128 irdma_next_poll(struct ibv_cq_ex *ibvcq_ex) 1129 { 1130 struct irdma_ucq *iwucq; 1131 int ret; 1132 1133 iwucq = container_of(ibvcq_ex, struct irdma_ucq, verbs_cq.cq_ex); 1134 ret = __irdma_upoll_cq(iwucq, 1, NULL); 1135 if (ret == 1) 1136 return 0; 1137 1138 /* No Completions on CQ */ 1139 if (!ret) 1140 ret = ENOENT; 1141 1142 return ret; 1143 } 1144 1145 /** 1146 * irdma_end_poll - verb_ex API callback to end polling of WC's 1147 * @ibvcq_ex: ibv extended CQ 1148 */ 1149 static void 1150 irdma_end_poll(struct ibv_cq_ex *ibvcq_ex) 1151 { 1152 struct irdma_ucq *iwucq = container_of(ibvcq_ex, struct irdma_ucq, 1153 verbs_cq.cq_ex); 1154 1155 pthread_spin_unlock(&iwucq->lock); 1156 } 1157 1158 static enum ibv_wc_opcode 1159 irdma_wc_read_opcode(struct ibv_cq_ex *ibvcq_ex) 1160 { 1161 struct irdma_ucq *iwucq = container_of(ibvcq_ex, struct irdma_ucq, 1162 verbs_cq.cq_ex); 1163 1164 switch (iwucq->cur_cqe.op_type) { 1165 case IRDMA_OP_TYPE_RDMA_WRITE: 1166 case IRDMA_OP_TYPE_RDMA_WRITE_SOL: 1167 return IBV_WC_RDMA_WRITE; 1168 case IRDMA_OP_TYPE_RDMA_READ: 1169 return IBV_WC_RDMA_READ; 1170 case IRDMA_OP_TYPE_SEND_SOL: 1171 case IRDMA_OP_TYPE_SEND_SOL_INV: 1172 case IRDMA_OP_TYPE_SEND_INV: 1173 case IRDMA_OP_TYPE_SEND: 1174 return IBV_WC_SEND; 1175 case IRDMA_OP_TYPE_BIND_MW: 1176 return IBV_WC_BIND_MW; 1177 case IRDMA_OP_TYPE_REC: 1178 return IBV_WC_RECV; 1179 case IRDMA_OP_TYPE_REC_IMM: 1180 return IBV_WC_RECV_RDMA_WITH_IMM; 1181 case IRDMA_OP_TYPE_INV_STAG: 1182 return IBV_WC_LOCAL_INV; 1183 } 1184 1185 printf("%s: Invalid opcode = %d in CQE\n", __func__, 1186 iwucq->cur_cqe.op_type); 1187 1188 return 0; 1189 } 1190 1191 static uint32_t irdma_wc_read_vendor_err(struct ibv_cq_ex *ibvcq_ex){ 1192 struct irdma_cq_poll_info *cur_cqe; 1193 struct irdma_ucq *iwucq; 1194 1195 iwucq = container_of(ibvcq_ex, struct irdma_ucq, verbs_cq.cq_ex); 1196 cur_cqe = &iwucq->cur_cqe; 1197 1198 return cur_cqe->error ? cur_cqe->major_err << 16 | cur_cqe->minor_err : 0; 1199 } 1200 1201 static int 1202 irdma_wc_read_wc_flags(struct ibv_cq_ex *ibvcq_ex) 1203 { 1204 struct irdma_cq_poll_info *cur_cqe; 1205 struct irdma_ucq *iwucq; 1206 struct irdma_qp_uk *qp; 1207 struct ibv_qp *ib_qp; 1208 int wc_flags = 0; 1209 1210 iwucq = container_of(ibvcq_ex, struct irdma_ucq, verbs_cq.cq_ex); 1211 cur_cqe = &iwucq->cur_cqe; 1212 qp = cur_cqe->qp_handle; 1213 ib_qp = qp->back_qp; 1214 1215 if (cur_cqe->imm_valid) 1216 wc_flags |= IBV_WC_WITH_IMM; 1217 1218 if (ib_qp->qp_type == IBV_QPT_UD) { 1219 wc_flags |= IBV_WC_GRH; 1220 } else { 1221 if (cur_cqe->stag_invalid_set) { 1222 switch (cur_cqe->op_type) { 1223 case IRDMA_OP_TYPE_REC: 1224 wc_flags |= IBV_WC_WITH_INV; 1225 break; 1226 case IRDMA_OP_TYPE_REC_IMM: 1227 wc_flags |= IBV_WC_WITH_INV; 1228 break; 1229 } 1230 } 1231 } 1232 1233 return wc_flags; 1234 } 1235 1236 static uint32_t irdma_wc_read_byte_len(struct ibv_cq_ex *ibvcq_ex){ 1237 struct irdma_ucq *iwucq = container_of(ibvcq_ex, struct irdma_ucq, 1238 verbs_cq.cq_ex); 1239 1240 return iwucq->cur_cqe.bytes_xfered; 1241 } 1242 1243 static __be32 irdma_wc_read_imm_data(struct ibv_cq_ex *ibvcq_ex){ 1244 struct irdma_cq_poll_info *cur_cqe; 1245 struct irdma_ucq *iwucq; 1246 1247 iwucq = container_of(ibvcq_ex, struct irdma_ucq, verbs_cq.cq_ex); 1248 cur_cqe = &iwucq->cur_cqe; 1249 1250 return cur_cqe->imm_valid ? htonl(cur_cqe->imm_data) : 0; 1251 } 1252 1253 static uint32_t irdma_wc_read_qp_num(struct ibv_cq_ex *ibvcq_ex){ 1254 struct irdma_ucq *iwucq = container_of(ibvcq_ex, struct irdma_ucq, 1255 verbs_cq.cq_ex); 1256 1257 return iwucq->cur_cqe.qp_id; 1258 } 1259 1260 static uint32_t irdma_wc_read_src_qp(struct ibv_cq_ex *ibvcq_ex){ 1261 struct irdma_cq_poll_info *cur_cqe; 1262 struct irdma_ucq *iwucq; 1263 struct irdma_qp_uk *qp; 1264 struct ibv_qp *ib_qp; 1265 1266 iwucq = container_of(ibvcq_ex, struct irdma_ucq, verbs_cq.cq_ex); 1267 cur_cqe = &iwucq->cur_cqe; 1268 qp = cur_cqe->qp_handle; 1269 ib_qp = qp->back_qp; 1270 1271 return ib_qp->qp_type == IBV_QPT_UD ? cur_cqe->ud_src_qpn : cur_cqe->qp_id; 1272 } 1273 1274 static uint8_t irdma_wc_read_sl(struct ibv_cq_ex *ibvcq_ex){ 1275 return 0; 1276 } 1277 1278 void 1279 irdma_ibvcq_ex_fill_priv_funcs(struct irdma_ucq *iwucq, 1280 struct ibv_cq_init_attr_ex *attr_ex) 1281 { 1282 struct ibv_cq_ex *ibvcq_ex = &iwucq->verbs_cq.cq_ex; 1283 1284 ibvcq_ex->start_poll = irdma_start_poll; 1285 ibvcq_ex->end_poll = irdma_end_poll; 1286 ibvcq_ex->next_poll = irdma_next_poll; 1287 1288 ibvcq_ex->read_opcode = irdma_wc_read_opcode; 1289 ibvcq_ex->read_vendor_err = irdma_wc_read_vendor_err; 1290 ibvcq_ex->read_wc_flags = irdma_wc_read_wc_flags; 1291 1292 if (attr_ex->wc_flags & IBV_WC_EX_WITH_BYTE_LEN) 1293 ibvcq_ex->read_byte_len = irdma_wc_read_byte_len; 1294 if (attr_ex->wc_flags & IBV_WC_EX_WITH_IMM) 1295 ibvcq_ex->read_imm_data = irdma_wc_read_imm_data; 1296 if (attr_ex->wc_flags & IBV_WC_EX_WITH_QP_NUM) 1297 ibvcq_ex->read_qp_num = irdma_wc_read_qp_num; 1298 if (attr_ex->wc_flags & IBV_WC_EX_WITH_SRC_QP) 1299 ibvcq_ex->read_src_qp = irdma_wc_read_src_qp; 1300 if (attr_ex->wc_flags & IBV_WC_EX_WITH_SL) 1301 ibvcq_ex->read_sl = irdma_wc_read_sl; 1302 } 1303 1304 /** 1305 * irdma_arm_cq - arm of cq 1306 * @iwucq: cq to which arm 1307 * @cq_notify: notification params 1308 */ 1309 static void 1310 irdma_arm_cq(struct irdma_ucq *iwucq, 1311 enum irdma_cmpl_notify cq_notify) 1312 { 1313 iwucq->is_armed = true; 1314 iwucq->arm_sol = true; 1315 iwucq->skip_arm = false; 1316 iwucq->skip_sol = true; 1317 irdma_uk_cq_request_notification(&iwucq->cq, cq_notify); 1318 } 1319 1320 /** 1321 * irdma_uarm_cq - callback for arm of cq 1322 * @cq: cq to arm 1323 * @solicited: to get notify params 1324 */ 1325 int 1326 irdma_uarm_cq(struct ibv_cq *cq, int solicited) 1327 { 1328 struct irdma_ucq *iwucq; 1329 enum irdma_cmpl_notify cq_notify = IRDMA_CQ_COMPL_EVENT; 1330 int ret; 1331 1332 iwucq = container_of(cq, struct irdma_ucq, verbs_cq.cq); 1333 if (solicited) 1334 cq_notify = IRDMA_CQ_COMPL_SOLICITED; 1335 1336 ret = pthread_spin_lock(&iwucq->lock); 1337 if (ret) 1338 return ret; 1339 1340 if (iwucq->is_armed) { 1341 if (iwucq->arm_sol && !solicited) { 1342 irdma_arm_cq(iwucq, cq_notify); 1343 } else { 1344 iwucq->skip_arm = true; 1345 iwucq->skip_sol = solicited ? true : false; 1346 } 1347 } else { 1348 irdma_arm_cq(iwucq, cq_notify); 1349 } 1350 1351 pthread_spin_unlock(&iwucq->lock); 1352 1353 return 0; 1354 } 1355 1356 /** 1357 * irdma_cq_event - cq to do completion event 1358 * @cq: cq to arm 1359 */ 1360 void 1361 irdma_cq_event(struct ibv_cq *cq) 1362 { 1363 struct irdma_ucq *iwucq; 1364 1365 iwucq = container_of(cq, struct irdma_ucq, verbs_cq.cq); 1366 if (pthread_spin_lock(&iwucq->lock)) 1367 return; 1368 1369 if (iwucq->skip_arm) 1370 irdma_arm_cq(iwucq, IRDMA_CQ_COMPL_EVENT); 1371 else 1372 iwucq->is_armed = false; 1373 1374 pthread_spin_unlock(&iwucq->lock); 1375 } 1376 1377 void * 1378 irdma_mmap(int fd, off_t offset) 1379 { 1380 void *map; 1381 1382 map = mmap(NULL, IRDMA_HW_PAGE_SIZE, PROT_WRITE | PROT_READ, MAP_SHARED, 1383 fd, offset); 1384 if (map == MAP_FAILED) 1385 return map; 1386 1387 if (ibv_dontfork_range(map, IRDMA_HW_PAGE_SIZE)) { 1388 munmap(map, IRDMA_HW_PAGE_SIZE); 1389 return MAP_FAILED; 1390 } 1391 1392 return map; 1393 } 1394 1395 void 1396 irdma_munmap(void *map) 1397 { 1398 ibv_dofork_range(map, IRDMA_HW_PAGE_SIZE); 1399 munmap(map, IRDMA_HW_PAGE_SIZE); 1400 } 1401 1402 /** 1403 * irdma_destroy_vmapped_qp - destroy resources for qp 1404 * @iwuqp: qp struct for resources 1405 */ 1406 static int 1407 irdma_destroy_vmapped_qp(struct irdma_uqp *iwuqp) 1408 { 1409 int ret; 1410 1411 ret = ibv_cmd_destroy_qp(&iwuqp->ibv_qp); 1412 if (ret) 1413 return ret; 1414 1415 if (iwuqp->qp.push_db) 1416 irdma_munmap(iwuqp->qp.push_db); 1417 if (iwuqp->qp.push_wqe) 1418 irdma_munmap(iwuqp->qp.push_wqe); 1419 1420 ibv_cmd_dereg_mr(&iwuqp->vmr.ibv_mr); 1421 1422 return 0; 1423 } 1424 1425 /** 1426 * irdma_vmapped_qp - create resources for qp 1427 * @iwuqp: qp struct for resources 1428 * @pd: pd for the qp 1429 * @attr: attributes of qp passed 1430 * @resp: response back from create qp 1431 * @info: uk info for initializing user level qp 1432 * @abi_ver: abi version of the create qp command 1433 */ 1434 static int 1435 irdma_vmapped_qp(struct irdma_uqp *iwuqp, struct ibv_pd *pd, 1436 struct ibv_qp_init_attr *attr, 1437 struct irdma_qp_uk_init_info *info, 1438 bool legacy_mode) 1439 { 1440 struct irdma_ucreate_qp cmd = {}; 1441 size_t sqsize, rqsize, totalqpsize; 1442 struct irdma_ucreate_qp_resp resp = {}; 1443 struct irdma_ureg_mr reg_mr_cmd = {}; 1444 struct ibv_reg_mr_resp reg_mr_resp = {}; 1445 int ret; 1446 1447 sqsize = roundup(info->sq_depth * IRDMA_QP_WQE_MIN_SIZE, IRDMA_HW_PAGE_SIZE); 1448 rqsize = roundup(info->rq_depth * IRDMA_QP_WQE_MIN_SIZE, IRDMA_HW_PAGE_SIZE); 1449 totalqpsize = rqsize + sqsize + IRDMA_DB_SHADOW_AREA_SIZE; 1450 info->sq = irdma_alloc_hw_buf(totalqpsize); 1451 iwuqp->buf_size = totalqpsize; 1452 1453 if (!info->sq) 1454 return ENOMEM; 1455 1456 memset(info->sq, 0, totalqpsize); 1457 info->rq = &info->sq[sqsize / IRDMA_QP_WQE_MIN_SIZE]; 1458 info->shadow_area = info->rq[rqsize / IRDMA_QP_WQE_MIN_SIZE].elem; 1459 1460 reg_mr_cmd.reg_type = IRDMA_MEMREG_TYPE_QP; 1461 reg_mr_cmd.sq_pages = sqsize >> IRDMA_HW_PAGE_SHIFT; 1462 reg_mr_cmd.rq_pages = rqsize >> IRDMA_HW_PAGE_SHIFT; 1463 1464 ret = ibv_cmd_reg_mr(pd, info->sq, totalqpsize, 1465 (uintptr_t)info->sq, IBV_ACCESS_LOCAL_WRITE, 1466 &iwuqp->vmr.ibv_mr, ®_mr_cmd.ibv_cmd, 1467 sizeof(reg_mr_cmd), ®_mr_resp, 1468 sizeof(reg_mr_resp)); 1469 if (ret) 1470 goto err_dereg_mr; 1471 1472 cmd.user_wqe_bufs = (__u64) ((uintptr_t)info->sq); 1473 cmd.user_compl_ctx = (__u64) (uintptr_t)&iwuqp->qp; 1474 ret = ibv_cmd_create_qp(pd, &iwuqp->ibv_qp, attr, &cmd.ibv_cmd, 1475 sizeof(cmd), &resp.ibv_resp, 1476 sizeof(struct irdma_ucreate_qp_resp)); 1477 if (ret) 1478 goto err_qp; 1479 1480 info->sq_size = resp.actual_sq_size; 1481 info->rq_size = resp.actual_rq_size; 1482 info->first_sq_wq = legacy_mode ? 1 : resp.lsmm; 1483 info->qp_caps = resp.qp_caps; 1484 info->qp_id = resp.qp_id; 1485 iwuqp->irdma_drv_opt = resp.irdma_drv_opt; 1486 iwuqp->ibv_qp.qp_num = resp.qp_id; 1487 1488 iwuqp->send_cq = container_of(attr->send_cq, struct irdma_ucq, 1489 verbs_cq.cq); 1490 iwuqp->recv_cq = container_of(attr->recv_cq, struct irdma_ucq, 1491 verbs_cq.cq); 1492 iwuqp->send_cq->uqp = iwuqp; 1493 iwuqp->recv_cq->uqp = iwuqp; 1494 1495 return 0; 1496 err_qp: 1497 ibv_cmd_dereg_mr(&iwuqp->vmr.ibv_mr); 1498 err_dereg_mr: 1499 printf("%s: failed to create QP, status %d\n", __func__, ret); 1500 irdma_free_hw_buf(info->sq, iwuqp->buf_size); 1501 return ret; 1502 } 1503 1504 /** 1505 * irdma_ucreate_qp - create qp on user app 1506 * @pd: pd for the qp 1507 * @attr: attributes of the qp to be created (sizes, sge, cq) 1508 */ 1509 struct ibv_qp * 1510 irdma_ucreate_qp(struct ibv_pd *pd, 1511 struct ibv_qp_init_attr *attr) 1512 { 1513 struct irdma_qp_uk_init_info info = {}; 1514 struct irdma_uk_attrs *uk_attrs; 1515 struct irdma_uvcontext *iwvctx; 1516 struct irdma_uqp *iwuqp; 1517 int status; 1518 1519 if (attr->qp_type != IBV_QPT_RC && attr->qp_type != IBV_QPT_UD) { 1520 printf("%s: failed to create QP, unsupported QP type: 0x%x\n", 1521 __func__, attr->qp_type); 1522 errno = EOPNOTSUPP; 1523 return NULL; 1524 } 1525 1526 iwvctx = container_of(pd->context, struct irdma_uvcontext, ibv_ctx); 1527 uk_attrs = &iwvctx->uk_attrs; 1528 1529 if (attr->cap.max_send_sge > uk_attrs->max_hw_wq_frags || 1530 attr->cap.max_recv_sge > uk_attrs->max_hw_wq_frags || 1531 attr->cap.max_inline_data > uk_attrs->max_hw_inline) { 1532 errno = EINVAL; 1533 return NULL; 1534 } 1535 1536 info.uk_attrs = uk_attrs; 1537 info.sq_size = attr->cap.max_send_wr; 1538 info.rq_size = attr->cap.max_recv_wr; 1539 info.max_sq_frag_cnt = attr->cap.max_send_sge; 1540 info.max_rq_frag_cnt = attr->cap.max_recv_sge; 1541 info.max_inline_data = attr->cap.max_inline_data; 1542 info.abi_ver = iwvctx->abi_ver; 1543 1544 status = irdma_uk_calc_depth_shift_sq(&info, &info.sq_depth, &info.sq_shift); 1545 if (status) { 1546 printf("%s: invalid SQ attributes, max_send_wr=%d max_send_sge=%d max_inline=%d\n", 1547 __func__, attr->cap.max_send_wr, attr->cap.max_send_sge, 1548 attr->cap.max_inline_data); 1549 errno = status; 1550 return NULL; 1551 } 1552 1553 status = irdma_uk_calc_depth_shift_rq(&info, &info.rq_depth, &info.rq_shift); 1554 if (status) { 1555 printf("%s: invalid RQ attributes, recv_wr=%d recv_sge=%d\n", 1556 __func__, attr->cap.max_recv_wr, attr->cap.max_recv_sge); 1557 errno = status; 1558 return NULL; 1559 } 1560 1561 iwuqp = memalign(1024, sizeof(*iwuqp)); 1562 if (!iwuqp) 1563 return NULL; 1564 1565 memset(iwuqp, 0, sizeof(*iwuqp)); 1566 1567 status = pthread_spin_init(&iwuqp->lock, PTHREAD_PROCESS_PRIVATE); 1568 if (status) 1569 goto err_free_qp; 1570 1571 info.sq_size = info.sq_depth >> info.sq_shift; 1572 info.rq_size = info.rq_depth >> info.rq_shift; 1573 /** 1574 * Maintain backward compatibility with older ABI which pass sq 1575 * and rq depth (in quanta) in cap.max_send_wr a cap.max_recv_wr 1576 */ 1577 if (!iwvctx->use_raw_attrs) { 1578 attr->cap.max_send_wr = info.sq_size; 1579 attr->cap.max_recv_wr = info.rq_size; 1580 } 1581 1582 iwuqp->recv_sges = calloc(attr->cap.max_recv_sge, sizeof(*iwuqp->recv_sges)); 1583 if (!iwuqp->recv_sges) { 1584 status = errno; /* preserve errno */ 1585 goto err_destroy_lock; 1586 } 1587 1588 info.wqe_alloc_db = (u32 *)iwvctx->db; 1589 info.legacy_mode = iwvctx->legacy_mode; 1590 info.sq_wrtrk_array = calloc(info.sq_depth, sizeof(*info.sq_wrtrk_array)); 1591 if (!info.sq_wrtrk_array) { 1592 status = errno; /* preserve errno */ 1593 goto err_free_rsges; 1594 } 1595 1596 info.rq_wrid_array = calloc(info.rq_depth, sizeof(*info.rq_wrid_array)); 1597 if (!info.rq_wrid_array) { 1598 status = errno; /* preserve errno */ 1599 goto err_free_sq_wrtrk; 1600 } 1601 1602 iwuqp->sq_sig_all = attr->sq_sig_all; 1603 iwuqp->qp_type = attr->qp_type; 1604 status = irdma_vmapped_qp(iwuqp, pd, attr, &info, iwvctx->legacy_mode); 1605 if (status) 1606 goto err_free_rq_wrid; 1607 1608 iwuqp->qp.back_qp = iwuqp; 1609 iwuqp->qp.lock = &iwuqp->lock; 1610 1611 status = irdma_uk_qp_init(&iwuqp->qp, &info); 1612 if (status) 1613 goto err_free_vmap_qp; 1614 1615 attr->cap.max_send_wr = (info.sq_depth - IRDMA_SQ_RSVD) >> info.sq_shift; 1616 attr->cap.max_recv_wr = (info.rq_depth - IRDMA_RQ_RSVD) >> info.rq_shift; 1617 1618 return &iwuqp->ibv_qp; 1619 1620 err_free_vmap_qp: 1621 irdma_destroy_vmapped_qp(iwuqp); 1622 irdma_free_hw_buf(info.sq, iwuqp->buf_size); 1623 err_free_rq_wrid: 1624 free(info.rq_wrid_array); 1625 err_free_sq_wrtrk: 1626 free(info.sq_wrtrk_array); 1627 err_free_rsges: 1628 free(iwuqp->recv_sges); 1629 err_destroy_lock: 1630 pthread_spin_destroy(&iwuqp->lock); 1631 err_free_qp: 1632 printf("%s: failed to create QP\n", __func__); 1633 free(iwuqp); 1634 1635 errno = status; 1636 return NULL; 1637 } 1638 1639 /** 1640 * irdma_uquery_qp - query qp for some attribute 1641 * @qp: qp for the attributes query 1642 * @attr: to return the attributes 1643 * @attr_mask: mask of what is query for 1644 * @init_attr: initial attributes during create_qp 1645 */ 1646 int 1647 irdma_uquery_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask, 1648 struct ibv_qp_init_attr *init_attr) 1649 { 1650 struct ibv_query_qp cmd; 1651 1652 return ibv_cmd_query_qp(qp, attr, attr_mask, init_attr, &cmd, 1653 sizeof(cmd)); 1654 } 1655 1656 /** 1657 * irdma_umodify_qp - send qp modify to driver 1658 * @qp: qp to modify 1659 * @attr: attribute to modify 1660 * @attr_mask: mask of the attribute 1661 */ 1662 int 1663 irdma_umodify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask) 1664 { 1665 struct irdma_umodify_qp_resp resp = {}; 1666 struct ibv_modify_qp cmd = {}; 1667 struct irdma_modify_qp_cmd cmd_ex = {}; 1668 struct irdma_uvcontext *iwvctx; 1669 struct irdma_uqp *iwuqp; 1670 1671 iwuqp = container_of(qp, struct irdma_uqp, ibv_qp); 1672 iwvctx = container_of(qp->context, struct irdma_uvcontext, ibv_ctx); 1673 1674 if (iwuqp->qp.qp_caps & IRDMA_PUSH_MODE && attr_mask & IBV_QP_STATE && 1675 iwvctx->uk_attrs.hw_rev > IRDMA_GEN_1) { 1676 u64 offset; 1677 void *map; 1678 int ret; 1679 1680 ret = ibv_cmd_modify_qp_ex(qp, attr, attr_mask, &cmd_ex.ibv_cmd, 1681 sizeof(cmd_ex.ibv_cmd), 1682 sizeof(cmd_ex), &resp.ibv_resp, 1683 sizeof(resp.ibv_resp), 1684 sizeof(resp)); 1685 if (!ret) 1686 iwuqp->qp.rd_fence_rate = resp.rd_fence_rate; 1687 if (ret || !resp.push_valid) 1688 return ret; 1689 1690 if (iwuqp->qp.push_wqe) 1691 return ret; 1692 1693 offset = resp.push_wqe_mmap_key; 1694 map = irdma_mmap(qp->context->cmd_fd, offset); 1695 if (map == MAP_FAILED) 1696 return ret; 1697 1698 iwuqp->qp.push_wqe = map; 1699 1700 offset = resp.push_db_mmap_key; 1701 map = irdma_mmap(qp->context->cmd_fd, offset); 1702 if (map == MAP_FAILED) { 1703 irdma_munmap(iwuqp->qp.push_wqe); 1704 iwuqp->qp.push_wqe = NULL; 1705 printf("failed to map push page, errno %d\n", errno); 1706 return ret; 1707 } 1708 iwuqp->qp.push_wqe += resp.push_offset; 1709 iwuqp->qp.push_db = map + resp.push_offset; 1710 1711 return ret; 1712 } else { 1713 int ret; 1714 1715 ret = ibv_cmd_modify_qp(qp, attr, attr_mask, &cmd, sizeof(cmd)); 1716 if (ret) 1717 return ret; 1718 if (attr_mask & IBV_QP_STATE && attr->qp_state == IBV_QPS_ERR) 1719 pthread_create(&iwuqp->flush_thread, NULL, irdma_flush_thread, iwuqp); 1720 return 0; 1721 } 1722 } 1723 1724 static void 1725 irdma_issue_flush(struct ibv_qp *qp, bool sq_flush, bool rq_flush) 1726 { 1727 struct irdma_umodify_qp_resp resp = {}; 1728 struct irdma_modify_qp_cmd cmd_ex = {}; 1729 struct ibv_qp_attr attr = {}; 1730 1731 attr.qp_state = IBV_QPS_ERR; 1732 cmd_ex.sq_flush = sq_flush; 1733 cmd_ex.rq_flush = rq_flush; 1734 1735 ibv_cmd_modify_qp_ex(qp, &attr, IBV_QP_STATE, 1736 &cmd_ex.ibv_cmd, 1737 sizeof(cmd_ex.ibv_cmd), 1738 sizeof(cmd_ex), &resp.ibv_resp, 1739 sizeof(resp.ibv_resp), 1740 sizeof(resp)); 1741 } 1742 1743 /** 1744 * irdma_clean_cqes - clean cq entries for qp 1745 * @qp: qp for which completions are cleaned 1746 * @iwcq: cq to be cleaned 1747 */ 1748 static void 1749 irdma_clean_cqes(struct irdma_qp_uk *qp, struct irdma_ucq *iwucq) 1750 { 1751 struct irdma_cq_uk *ukcq = &iwucq->cq; 1752 int ret; 1753 1754 ret = pthread_spin_lock(&iwucq->lock); 1755 if (ret) 1756 return; 1757 1758 irdma_uk_clean_cq(qp, ukcq); 1759 pthread_spin_unlock(&iwucq->lock); 1760 } 1761 1762 /** 1763 * irdma_udestroy_qp - destroy qp 1764 * @qp: qp to destroy 1765 */ 1766 int 1767 irdma_udestroy_qp(struct ibv_qp *qp) 1768 { 1769 struct irdma_uqp *iwuqp; 1770 int ret; 1771 1772 iwuqp = container_of(qp, struct irdma_uqp, ibv_qp); 1773 if (iwuqp->flush_thread) { 1774 pthread_cancel(iwuqp->flush_thread); 1775 pthread_join(iwuqp->flush_thread, NULL); 1776 } 1777 ret = pthread_spin_destroy(&iwuqp->lock); 1778 if (ret) 1779 goto err; 1780 1781 ret = irdma_destroy_vmapped_qp(iwuqp); 1782 if (ret) 1783 goto err; 1784 1785 /* Clean any pending completions from the cq(s) */ 1786 if (iwuqp->send_cq) 1787 irdma_clean_cqes(&iwuqp->qp, iwuqp->send_cq); 1788 1789 if (iwuqp->recv_cq && iwuqp->recv_cq != iwuqp->send_cq) 1790 irdma_clean_cqes(&iwuqp->qp, iwuqp->recv_cq); 1791 1792 if (iwuqp->qp.sq_wrtrk_array) 1793 free(iwuqp->qp.sq_wrtrk_array); 1794 if (iwuqp->qp.rq_wrid_array) 1795 free(iwuqp->qp.rq_wrid_array); 1796 1797 irdma_free_hw_buf(iwuqp->qp.sq_base, iwuqp->buf_size); 1798 free(iwuqp->recv_sges); 1799 free(iwuqp); 1800 return 0; 1801 1802 err: 1803 printf("%s: failed to destroy QP, status %d\n", 1804 __func__, ret); 1805 return ret; 1806 } 1807 1808 /** 1809 * irdma_copy_sg_list - copy sg list for qp 1810 * @sg_list: copied into sg_list 1811 * @sgl: copy from sgl 1812 * @num_sges: count of sg entries 1813 * @max_sges: count of max supported sg entries 1814 */ 1815 static void 1816 irdma_copy_sg_list(struct irdma_sge *sg_list, struct ibv_sge *sgl, 1817 int num_sges) 1818 { 1819 int i; 1820 1821 for (i = 0; i < num_sges; i++) { 1822 sg_list[i].tag_off = sgl[i].addr; 1823 sg_list[i].len = sgl[i].length; 1824 sg_list[i].stag = sgl[i].lkey; 1825 } 1826 } 1827 1828 /** 1829 * calc_type2_mw_stag - calculate type 2 MW stag 1830 * @rkey: desired rkey of the MW 1831 * @mw_rkey: type2 memory window rkey 1832 * 1833 * compute type2 memory window stag by taking lower 8 bits 1834 * of the desired rkey and leaving 24 bits if mw->rkey unchanged 1835 */ 1836 static inline u32 calc_type2_mw_stag(u32 rkey, u32 mw_rkey) { 1837 const u32 mask = 0xff; 1838 1839 return (rkey & mask) | (mw_rkey & ~mask); 1840 } 1841 1842 /** 1843 * irdma_post_send - post send wr for user application 1844 * @ib_qp: qp to post wr 1845 * @ib_wr: work request ptr 1846 * @bad_wr: return of bad wr if err 1847 */ 1848 int 1849 irdma_upost_send(struct ibv_qp *ib_qp, struct ibv_send_wr *ib_wr, 1850 struct ibv_send_wr **bad_wr) 1851 { 1852 struct irdma_post_sq_info info; 1853 struct irdma_uvcontext *iwvctx; 1854 struct irdma_uk_attrs *uk_attrs; 1855 struct irdma_uqp *iwuqp; 1856 bool reflush = false; 1857 int err = 0; 1858 1859 iwuqp = container_of(ib_qp, struct irdma_uqp, ibv_qp); 1860 iwvctx = container_of(ib_qp->context, struct irdma_uvcontext, ibv_ctx); 1861 uk_attrs = &iwvctx->uk_attrs; 1862 1863 err = pthread_spin_lock(&iwuqp->lock); 1864 if (err) 1865 return err; 1866 1867 if (!IRDMA_RING_MORE_WORK(iwuqp->qp.sq_ring) && 1868 ib_qp->state == IBV_QPS_ERR) 1869 reflush = true; 1870 1871 while (ib_wr) { 1872 memset(&info, 0, sizeof(info)); 1873 info.wr_id = (u64)(ib_wr->wr_id); 1874 if ((ib_wr->send_flags & IBV_SEND_SIGNALED) || 1875 iwuqp->sq_sig_all) 1876 info.signaled = true; 1877 if (ib_wr->send_flags & IBV_SEND_FENCE) 1878 info.read_fence = true; 1879 1880 switch (ib_wr->opcode) { 1881 case IBV_WR_SEND_WITH_IMM: 1882 if (iwuqp->qp.qp_caps & IRDMA_SEND_WITH_IMM) { 1883 info.imm_data_valid = true; 1884 info.imm_data = ntohl(ib_wr->imm_data); 1885 } else { 1886 err = EINVAL; 1887 break; 1888 } 1889 /* fallthrough */ 1890 case IBV_WR_SEND: 1891 case IBV_WR_SEND_WITH_INV: 1892 if (ib_wr->opcode == IBV_WR_SEND || 1893 ib_wr->opcode == IBV_WR_SEND_WITH_IMM) { 1894 if (ib_wr->send_flags & IBV_SEND_SOLICITED) 1895 info.op_type = IRDMA_OP_TYPE_SEND_SOL; 1896 else 1897 info.op_type = IRDMA_OP_TYPE_SEND; 1898 } else { 1899 if (ib_wr->send_flags & IBV_SEND_SOLICITED) 1900 info.op_type = IRDMA_OP_TYPE_SEND_SOL_INV; 1901 else 1902 info.op_type = IRDMA_OP_TYPE_SEND_INV; 1903 info.stag_to_inv = ib_wr->imm_data; 1904 } 1905 info.op.send.num_sges = ib_wr->num_sge; 1906 info.op.send.sg_list = (struct irdma_sge *)ib_wr->sg_list; 1907 if (ib_qp->qp_type == IBV_QPT_UD) { 1908 struct irdma_uah *ah = container_of(ib_wr->wr.ud.ah, 1909 struct irdma_uah, ibv_ah); 1910 1911 info.op.send.ah_id = ah->ah_id; 1912 info.op.send.qkey = ib_wr->wr.ud.remote_qkey; 1913 info.op.send.dest_qp = ib_wr->wr.ud.remote_qpn; 1914 } 1915 1916 if (ib_wr->send_flags & IBV_SEND_INLINE) 1917 err = irdma_uk_inline_send(&iwuqp->qp, &info, false); 1918 else 1919 err = irdma_uk_send(&iwuqp->qp, &info, false); 1920 break; 1921 case IBV_WR_RDMA_WRITE_WITH_IMM: 1922 if (iwuqp->qp.qp_caps & IRDMA_WRITE_WITH_IMM) { 1923 info.imm_data_valid = true; 1924 info.imm_data = ntohl(ib_wr->imm_data); 1925 } else { 1926 err = EINVAL; 1927 break; 1928 } 1929 /* fallthrough */ 1930 case IBV_WR_RDMA_WRITE: 1931 if (ib_wr->send_flags & IBV_SEND_SOLICITED) 1932 info.op_type = IRDMA_OP_TYPE_RDMA_WRITE_SOL; 1933 else 1934 info.op_type = IRDMA_OP_TYPE_RDMA_WRITE; 1935 1936 info.op.rdma_write.num_lo_sges = ib_wr->num_sge; 1937 info.op.rdma_write.lo_sg_list = (void *)ib_wr->sg_list; 1938 info.op.rdma_write.rem_addr.tag_off = ib_wr->wr.rdma.remote_addr; 1939 info.op.rdma_write.rem_addr.stag = ib_wr->wr.rdma.rkey; 1940 if (ib_wr->send_flags & IBV_SEND_INLINE) 1941 err = irdma_uk_inline_rdma_write(&iwuqp->qp, &info, false); 1942 else 1943 err = irdma_uk_rdma_write(&iwuqp->qp, &info, false); 1944 break; 1945 case IBV_WR_RDMA_READ: 1946 if (ib_wr->num_sge > uk_attrs->max_hw_read_sges) { 1947 err = EINVAL; 1948 break; 1949 } 1950 info.op_type = IRDMA_OP_TYPE_RDMA_READ; 1951 info.op.rdma_read.rem_addr.tag_off = ib_wr->wr.rdma.remote_addr; 1952 info.op.rdma_read.rem_addr.stag = ib_wr->wr.rdma.rkey; 1953 1954 info.op.rdma_read.lo_sg_list = (void *)ib_wr->sg_list; 1955 info.op.rdma_read.num_lo_sges = ib_wr->num_sge; 1956 err = irdma_uk_rdma_read(&iwuqp->qp, &info, false, false); 1957 break; 1958 case IBV_WR_BIND_MW: 1959 if (ib_qp->qp_type != IBV_QPT_RC) { 1960 err = EINVAL; 1961 break; 1962 } 1963 info.op_type = IRDMA_OP_TYPE_BIND_MW; 1964 info.op.bind_window.mr_stag = ib_wr->bind_mw.bind_info.mr->rkey; 1965 if (ib_wr->bind_mw.mw->type == IBV_MW_TYPE_1) { 1966 info.op.bind_window.mem_window_type_1 = true; 1967 info.op.bind_window.mw_stag = ib_wr->bind_mw.rkey; 1968 } else { 1969 struct verbs_mr *vmr = verbs_get_mr(ib_wr->bind_mw.bind_info.mr); 1970 1971 if (vmr->access & IBV_ACCESS_ZERO_BASED) { 1972 err = EINVAL; 1973 break; 1974 } 1975 info.op.bind_window.mw_stag = 1976 calc_type2_mw_stag(ib_wr->bind_mw.rkey, ib_wr->bind_mw.mw->rkey); 1977 ib_wr->bind_mw.mw->rkey = info.op.bind_window.mw_stag; 1978 1979 } 1980 1981 if (ib_wr->bind_mw.bind_info.mw_access_flags & IBV_ACCESS_ZERO_BASED) { 1982 info.op.bind_window.addressing_type = IRDMA_ADDR_TYPE_ZERO_BASED; 1983 info.op.bind_window.va = NULL; 1984 } else { 1985 info.op.bind_window.addressing_type = IRDMA_ADDR_TYPE_VA_BASED; 1986 info.op.bind_window.va = (void *)(uintptr_t)ib_wr->bind_mw.bind_info.addr; 1987 } 1988 info.op.bind_window.bind_len = ib_wr->bind_mw.bind_info.length; 1989 info.op.bind_window.ena_reads = 1990 (ib_wr->bind_mw.bind_info.mw_access_flags & IBV_ACCESS_REMOTE_READ) ? 1 : 0; 1991 info.op.bind_window.ena_writes = 1992 (ib_wr->bind_mw.bind_info.mw_access_flags & IBV_ACCESS_REMOTE_WRITE) ? 1 : 0; 1993 1994 err = irdma_uk_mw_bind(&iwuqp->qp, &info, false); 1995 break; 1996 case IBV_WR_LOCAL_INV: 1997 info.op_type = IRDMA_OP_TYPE_INV_STAG; 1998 info.op.inv_local_stag.target_stag = ib_wr->imm_data; 1999 err = irdma_uk_stag_local_invalidate(&iwuqp->qp, &info, true); 2000 break; 2001 default: 2002 /* error */ 2003 err = EINVAL; 2004 printf("%s: post work request failed, invalid opcode: 0x%x\n", 2005 __func__, ib_wr->opcode); 2006 break; 2007 } 2008 if (err) 2009 break; 2010 2011 ib_wr = ib_wr->next; 2012 } 2013 2014 if (err) 2015 *bad_wr = ib_wr; 2016 2017 irdma_uk_qp_post_wr(&iwuqp->qp); 2018 if (reflush) 2019 irdma_issue_flush(ib_qp, 1, 0); 2020 2021 pthread_spin_unlock(&iwuqp->lock); 2022 2023 return err; 2024 } 2025 2026 /** 2027 * irdma_post_recv - post receive wr for user application 2028 * @ib_wr: work request for receive 2029 * @bad_wr: bad wr caused an error 2030 */ 2031 int 2032 irdma_upost_recv(struct ibv_qp *ib_qp, struct ibv_recv_wr *ib_wr, 2033 struct ibv_recv_wr **bad_wr) 2034 { 2035 struct irdma_post_rq_info post_recv = {}; 2036 struct irdma_sge *sg_list; 2037 struct irdma_uqp *iwuqp; 2038 bool reflush = false; 2039 int err = 0; 2040 2041 iwuqp = container_of(ib_qp, struct irdma_uqp, ibv_qp); 2042 sg_list = iwuqp->recv_sges; 2043 2044 err = pthread_spin_lock(&iwuqp->lock); 2045 if (err) 2046 return err; 2047 2048 if (!IRDMA_RING_MORE_WORK(iwuqp->qp.rq_ring) && 2049 ib_qp->state == IBV_QPS_ERR) 2050 reflush = true; 2051 2052 while (ib_wr) { 2053 if (ib_wr->num_sge > iwuqp->qp.max_rq_frag_cnt) { 2054 *bad_wr = ib_wr; 2055 err = EINVAL; 2056 goto error; 2057 } 2058 post_recv.num_sges = ib_wr->num_sge; 2059 post_recv.wr_id = ib_wr->wr_id; 2060 irdma_copy_sg_list(sg_list, ib_wr->sg_list, ib_wr->num_sge); 2061 post_recv.sg_list = sg_list; 2062 err = irdma_uk_post_receive(&iwuqp->qp, &post_recv); 2063 if (err) { 2064 *bad_wr = ib_wr; 2065 goto error; 2066 } 2067 2068 if (reflush) 2069 irdma_issue_flush(ib_qp, 0, 1); 2070 2071 ib_wr = ib_wr->next; 2072 } 2073 error: 2074 pthread_spin_unlock(&iwuqp->lock); 2075 2076 return err; 2077 } 2078 2079 /** 2080 * irdma_ucreate_ah - create address handle associated with a pd 2081 * @ibpd: pd for the address handle 2082 * @attr: attributes of address handle 2083 */ 2084 struct ibv_ah * 2085 irdma_ucreate_ah(struct ibv_pd *ibpd, struct ibv_ah_attr *attr) 2086 { 2087 struct irdma_uah *ah; 2088 union ibv_gid sgid; 2089 struct irdma_ucreate_ah_resp resp = {}; 2090 int err; 2091 2092 if (ibv_query_gid(ibpd->context, attr->port_num, attr->grh.sgid_index, 2093 &sgid)) { 2094 fprintf(stderr, "irdma: Error from ibv_query_gid.\n"); 2095 errno = ENOENT; 2096 return NULL; 2097 } 2098 2099 ah = calloc(1, sizeof(*ah)); 2100 if (!ah) 2101 return NULL; 2102 2103 err = ibv_cmd_create_ah(ibpd, &ah->ibv_ah, attr, &resp.ibv_resp, 2104 sizeof(resp)); 2105 if (err) { 2106 free(ah); 2107 errno = err; 2108 return NULL; 2109 } 2110 2111 ah->ah_id = resp.ah_id; 2112 2113 return &ah->ibv_ah; 2114 } 2115 2116 /** 2117 * irdma_udestroy_ah - destroy the address handle 2118 * @ibah: address handle 2119 */ 2120 int 2121 irdma_udestroy_ah(struct ibv_ah *ibah) 2122 { 2123 struct irdma_uah *ah; 2124 int ret; 2125 2126 ah = container_of(ibah, struct irdma_uah, ibv_ah); 2127 2128 ret = ibv_cmd_destroy_ah(ibah); 2129 if (ret) 2130 return ret; 2131 2132 free(ah); 2133 2134 return 0; 2135 } 2136 2137 /** 2138 * irdma_uattach_mcast - Attach qp to multicast group implemented 2139 * @qp: The queue pair 2140 * @gid:The Global ID for multicast group 2141 * @lid: The Local ID 2142 */ 2143 int 2144 irdma_uattach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, 2145 uint16_t lid) 2146 { 2147 return ibv_cmd_attach_mcast(qp, gid, lid); 2148 } 2149 2150 /** 2151 * irdma_udetach_mcast - Detach qp from multicast group 2152 * @qp: The queue pair 2153 * @gid:The Global ID for multicast group 2154 * @lid: The Local ID 2155 */ 2156 int 2157 irdma_udetach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, 2158 uint16_t lid) 2159 { 2160 return ibv_cmd_detach_mcast(qp, gid, lid); 2161 } 2162 2163 /** 2164 * irdma_uresize_cq - resizes a cq 2165 * @cq: cq to resize 2166 * @cqe: the number of cqes of the new cq 2167 */ 2168 int 2169 irdma_uresize_cq(struct ibv_cq *cq, int cqe) 2170 { 2171 struct irdma_uvcontext *iwvctx; 2172 struct irdma_uk_attrs *uk_attrs; 2173 struct irdma_uresize_cq cmd = {}; 2174 struct ibv_resize_cq_resp resp = {}; 2175 struct irdma_ureg_mr reg_mr_cmd = {}; 2176 struct ibv_reg_mr_resp reg_mr_resp = {}; 2177 struct irdma_cq_buf *cq_buf = NULL; 2178 struct irdma_cqe *cq_base = NULL; 2179 struct verbs_mr new_mr = {}; 2180 struct irdma_ucq *iwucq; 2181 size_t cq_size; 2182 u32 cq_pages; 2183 int cqe_needed; 2184 int ret = 0; 2185 bool cqe_64byte_ena; 2186 2187 iwucq = container_of(cq, struct irdma_ucq, verbs_cq.cq); 2188 iwvctx = container_of(cq->context, struct irdma_uvcontext, ibv_ctx); 2189 uk_attrs = &iwvctx->uk_attrs; 2190 2191 if (!(uk_attrs->feature_flags & IRDMA_FEATURE_CQ_RESIZE)) 2192 return EOPNOTSUPP; 2193 2194 if (cqe < uk_attrs->min_hw_cq_size || cqe > uk_attrs->max_hw_cq_size - 1) 2195 return EINVAL; 2196 2197 cqe_64byte_ena = uk_attrs->feature_flags & IRDMA_FEATURE_64_BYTE_CQE ? true : false; 2198 2199 cqe_needed = get_cq_size(cqe, uk_attrs->hw_rev, cqe_64byte_ena); 2200 2201 if (cqe_needed == iwucq->cq.cq_size) 2202 return 0; 2203 2204 cq_size = get_cq_total_bytes(cqe_needed, cqe_64byte_ena); 2205 cq_pages = cq_size >> IRDMA_HW_PAGE_SHIFT; 2206 cq_base = irdma_alloc_hw_buf(cq_size); 2207 if (!cq_base) 2208 return ENOMEM; 2209 2210 memset(cq_base, 0, cq_size); 2211 2212 cq_buf = malloc(sizeof(*cq_buf)); 2213 if (!cq_buf) { 2214 ret = ENOMEM; 2215 goto err_buf; 2216 } 2217 2218 new_mr.ibv_mr.pd = iwucq->vmr.ibv_mr.pd; 2219 reg_mr_cmd.reg_type = IRDMA_MEMREG_TYPE_CQ; 2220 reg_mr_cmd.cq_pages = cq_pages; 2221 2222 ret = ibv_cmd_reg_mr(new_mr.ibv_mr.pd, cq_base, cq_size, 2223 (uintptr_t)cq_base, IBV_ACCESS_LOCAL_WRITE, 2224 &new_mr.ibv_mr, ®_mr_cmd.ibv_cmd, sizeof(reg_mr_cmd), 2225 ®_mr_resp, sizeof(reg_mr_resp)); 2226 if (ret) 2227 goto err_dereg_mr; 2228 2229 ret = pthread_spin_lock(&iwucq->lock); 2230 if (ret) 2231 goto err_lock; 2232 2233 cmd.user_cq_buffer = (__u64) ((uintptr_t)cq_base); 2234 ret = ibv_cmd_resize_cq(&iwucq->verbs_cq.cq, cqe_needed, &cmd.ibv_cmd, 2235 sizeof(cmd), &resp, sizeof(resp)); 2236 if (ret) 2237 goto err_resize; 2238 2239 memcpy(&cq_buf->cq, &iwucq->cq, sizeof(cq_buf->cq)); 2240 cq_buf->buf_size = cq_size; 2241 cq_buf->vmr = iwucq->vmr; 2242 iwucq->vmr = new_mr; 2243 irdma_uk_cq_resize(&iwucq->cq, cq_base, cqe_needed); 2244 iwucq->verbs_cq.cq.cqe = cqe; 2245 LIST_INSERT_HEAD(&iwucq->resize_list, cq_buf, list); 2246 2247 pthread_spin_unlock(&iwucq->lock); 2248 2249 return ret; 2250 2251 err_resize: 2252 pthread_spin_unlock(&iwucq->lock); 2253 err_lock: 2254 ibv_cmd_dereg_mr(&new_mr.ibv_mr); 2255 err_dereg_mr: 2256 free(cq_buf); 2257 err_buf: 2258 fprintf(stderr, "failed to resize CQ cq_id=%d ret=%d\n", iwucq->cq.cq_id, ret); 2259 irdma_free_hw_buf(cq_base, cq_size); 2260 return ret; 2261 } 2262