1 /*- 2 * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB 3 * 4 * Copyright (C) 2019 - 2023 Intel Corporation 5 * 6 * This software is available to you under a choice of one of two 7 * licenses. You may choose to be licensed under the terms of the GNU 8 * General Public License (GPL) Version 2, available from the file 9 * COPYING in the main directory of this source tree, or the 10 * OpenFabrics.org BSD license below: 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above 17 * copyright notice, this list of conditions and the following 18 * disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials 23 * provided with the distribution. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * SOFTWARE. 33 */ 34 35 #include <config.h> 36 #include <stdlib.h> 37 #include <stdio.h> 38 #include <string.h> 39 #include <unistd.h> 40 #include <signal.h> 41 #include <errno.h> 42 #include <sys/param.h> 43 #include <sys/mman.h> 44 #include <netinet/in.h> 45 #include <sys/stat.h> 46 #include <fcntl.h> 47 #include <stdbool.h> 48 #include <infiniband/opcode.h> 49 50 #include "irdma_umain.h" 51 #include "abi.h" 52 53 static inline void 54 print_fw_ver(uint64_t fw_ver, char *str, size_t len) 55 { 56 uint16_t major, minor; 57 58 major = fw_ver >> 32 & 0xffff; 59 minor = fw_ver & 0xffff; 60 61 snprintf(str, len, "%d.%d", major, minor); 62 } 63 64 /** 65 * irdma_uquery_device_ex - query device attributes including extended properties 66 * @context: user context for the device 67 * @input: extensible input struct for ibv_query_device_ex verb 68 * @attr: extended device attribute struct 69 * @attr_size: size of extended device attribute struct 70 **/ 71 int 72 irdma_uquery_device_ex(struct ibv_context *context, 73 const struct ibv_query_device_ex_input *input, 74 struct ibv_device_attr_ex *attr, size_t attr_size) 75 { 76 struct irdma_query_device_ex cmd = {}; 77 struct irdma_query_device_ex_resp resp = {}; 78 uint64_t fw_ver; 79 int ret; 80 81 ret = ibv_cmd_query_device_ex(context, input, attr, attr_size, &fw_ver, 82 &cmd.ibv_cmd, sizeof(cmd.ibv_cmd), sizeof(cmd), 83 &resp.ibv_resp, sizeof(resp.ibv_resp), sizeof(resp)); 84 if (ret) 85 return ret; 86 87 print_fw_ver(fw_ver, attr->orig_attr.fw_ver, sizeof(attr->orig_attr.fw_ver)); 88 89 return 0; 90 } 91 92 /** 93 * irdma_uquery_device - call driver to query device for max resources 94 * @context: user context for the device 95 * @attr: where to save all the mx resources from the driver 96 **/ 97 int 98 irdma_uquery_device(struct ibv_context *context, struct ibv_device_attr *attr) 99 { 100 struct ibv_query_device cmd; 101 uint64_t fw_ver; 102 int ret; 103 104 ret = ibv_cmd_query_device(context, attr, &fw_ver, &cmd, sizeof(cmd)); 105 if (ret) 106 return ret; 107 108 print_fw_ver(fw_ver, attr->fw_ver, sizeof(attr->fw_ver)); 109 110 return 0; 111 } 112 113 /** 114 * irdma_uquery_port - get port attributes (msg size, lnk, mtu...) 115 * @context: user context of the device 116 * @port: port for the attributes 117 * @attr: to return port attributes 118 **/ 119 int 120 irdma_uquery_port(struct ibv_context *context, uint8_t port, 121 struct ibv_port_attr *attr) 122 { 123 struct ibv_query_port cmd; 124 125 return ibv_cmd_query_port(context, port, attr, &cmd, sizeof(cmd)); 126 } 127 128 /** 129 * irdma_ualloc_pd - allocates protection domain and return pd ptr 130 * @context: user context of the device 131 **/ 132 struct ibv_pd * 133 irdma_ualloc_pd(struct ibv_context *context) 134 { 135 struct ibv_alloc_pd cmd; 136 struct irdma_ualloc_pd_resp resp = {}; 137 struct irdma_upd *iwupd; 138 int err; 139 140 iwupd = calloc(1, sizeof(*iwupd)); 141 if (!iwupd) 142 return NULL; 143 144 err = ibv_cmd_alloc_pd(context, &iwupd->ibv_pd, &cmd, sizeof(cmd), 145 &resp.ibv_resp, sizeof(resp)); 146 if (err) 147 goto err_free; 148 149 iwupd->pd_id = resp.pd_id; 150 151 return &iwupd->ibv_pd; 152 153 err_free: 154 free(iwupd); 155 156 errno = err; 157 return NULL; 158 } 159 160 /** 161 * irdma_ufree_pd - free pd resources 162 * @pd: pd to free resources 163 */ 164 int 165 irdma_ufree_pd(struct ibv_pd *pd) 166 { 167 struct irdma_upd *iwupd; 168 int ret; 169 170 iwupd = container_of(pd, struct irdma_upd, ibv_pd); 171 ret = ibv_cmd_dealloc_pd(pd); 172 if (ret) 173 return ret; 174 175 free(iwupd); 176 177 return 0; 178 } 179 180 /** 181 * irdma_ureg_mr - register user memory region 182 * @pd: pd for the mr 183 * @addr: user address of the memory region 184 * @length: length of the memory 185 * @hca_va: hca_va 186 * @access: access allowed on this mr 187 */ 188 struct ibv_mr * 189 irdma_ureg_mr(struct ibv_pd *pd, void *addr, size_t length, 190 int access) 191 { 192 struct verbs_mr *vmr; 193 struct irdma_ureg_mr cmd = {}; 194 struct ibv_reg_mr_resp resp; 195 int err; 196 197 vmr = malloc(sizeof(*vmr)); 198 if (!vmr) 199 return NULL; 200 201 cmd.reg_type = IRDMA_MEMREG_TYPE_MEM; 202 err = ibv_cmd_reg_mr(pd, addr, length, 203 (uintptr_t)addr, access, &vmr->ibv_mr, &cmd.ibv_cmd, 204 sizeof(cmd), &resp, sizeof(resp)); 205 if (err) { 206 free(vmr); 207 errno = err; 208 return NULL; 209 } 210 211 return &vmr->ibv_mr; 212 } 213 214 /* 215 * irdma_urereg_mr - re-register memory region @vmr: mr that was allocated @flags: bit mask to indicate which of the 216 * attr's of MR modified @pd: pd of the mr @addr: user address of the memory region @length: length of the memory 217 * @access: access allowed on this mr 218 */ 219 int 220 irdma_urereg_mr(struct verbs_mr *vmr, int flags, struct ibv_pd *pd, 221 void *addr, size_t length, int access) 222 { 223 struct irdma_urereg_mr cmd = {}; 224 struct ibv_rereg_mr_resp resp; 225 226 cmd.reg_type = IRDMA_MEMREG_TYPE_MEM; 227 return ibv_cmd_rereg_mr(&vmr->ibv_mr, flags, addr, length, (uintptr_t)addr, 228 access, pd, &cmd.ibv_cmd, sizeof(cmd), &resp, 229 sizeof(resp)); 230 } 231 232 /** 233 * irdma_udereg_mr - re-register memory region 234 * @mr: mr that was allocated 235 */ 236 int 237 irdma_udereg_mr(struct ibv_mr *mr) 238 { 239 struct verbs_mr *vmr; 240 int ret; 241 242 vmr = container_of(mr, struct verbs_mr, ibv_mr); 243 244 ret = ibv_cmd_dereg_mr(mr); 245 if (ret) 246 return ret; 247 248 return 0; 249 } 250 251 /** 252 * irdma_ualloc_mw - allocate memory window 253 * @pd: protection domain 254 * @type: memory window type 255 */ 256 struct ibv_mw * 257 irdma_ualloc_mw(struct ibv_pd *pd, enum ibv_mw_type type) 258 { 259 struct ibv_mw *mw; 260 struct ibv_alloc_mw cmd; 261 struct ibv_alloc_mw_resp resp; 262 int err; 263 264 mw = calloc(1, sizeof(*mw)); 265 if (!mw) 266 return NULL; 267 268 err = ibv_cmd_alloc_mw(pd, type, mw, &cmd, sizeof(cmd), &resp, 269 sizeof(resp)); 270 if (err) { 271 printf("%s: Failed to alloc memory window\n", 272 __func__); 273 free(mw); 274 errno = err; 275 return NULL; 276 } 277 278 return mw; 279 } 280 281 /** 282 * irdma_ubind_mw - bind a memory window 283 * @qp: qp to post WR 284 * @mw: memory window to bind 285 * @mw_bind: bind info 286 */ 287 int 288 irdma_ubind_mw(struct ibv_qp *qp, struct ibv_mw *mw, 289 struct ibv_mw_bind *mw_bind) 290 { 291 struct ibv_mw_bind_info *bind_info = &mw_bind->bind_info; 292 struct verbs_mr *vmr; 293 294 struct ibv_send_wr wr = {}; 295 struct ibv_send_wr *bad_wr; 296 int err; 297 298 if (!bind_info->mr && (bind_info->addr || bind_info->length)) 299 return EINVAL; 300 301 if (bind_info->mr) { 302 vmr = verbs_get_mr(bind_info->mr); 303 if (vmr->mr_type != IBV_MR_TYPE_MR) 304 return ENOTSUP; 305 306 if (vmr->access & IBV_ACCESS_ZERO_BASED) 307 return EINVAL; 308 309 if (mw->pd != bind_info->mr->pd) 310 return EPERM; 311 } 312 313 wr.opcode = IBV_WR_BIND_MW; 314 wr.bind_mw.bind_info = mw_bind->bind_info; 315 wr.bind_mw.mw = mw; 316 wr.bind_mw.rkey = ibv_inc_rkey(mw->rkey); 317 318 wr.wr_id = mw_bind->wr_id; 319 wr.send_flags = mw_bind->send_flags; 320 321 err = irdma_upost_send(qp, &wr, &bad_wr); 322 if (!err) 323 mw->rkey = wr.bind_mw.rkey; 324 325 return err; 326 } 327 328 /** 329 * irdma_udealloc_mw - deallocate memory window 330 * @mw: memory window to dealloc 331 */ 332 int 333 irdma_udealloc_mw(struct ibv_mw *mw) 334 { 335 int ret; 336 struct ibv_dealloc_mw cmd; 337 338 ret = ibv_cmd_dealloc_mw(mw, &cmd, sizeof(cmd)); 339 if (ret) 340 return ret; 341 free(mw); 342 343 return 0; 344 } 345 346 static void * 347 irdma_alloc_hw_buf(size_t size) 348 { 349 void *buf; 350 351 buf = memalign(IRDMA_HW_PAGE_SIZE, size); 352 353 if (!buf) 354 return NULL; 355 if (ibv_dontfork_range(buf, size)) { 356 free(buf); 357 return NULL; 358 } 359 360 return buf; 361 } 362 363 static void 364 irdma_free_hw_buf(void *buf, size_t size) 365 { 366 ibv_dofork_range(buf, size); 367 free(buf); 368 } 369 370 /** 371 * get_cq_size - returns actual cqe needed by HW 372 * @ncqe: minimum cqes requested by application 373 * @hw_rev: HW generation 374 * @cqe_64byte_ena: enable 64byte cqe 375 */ 376 static inline int 377 get_cq_size(int ncqe, u8 hw_rev) 378 { 379 ncqe++; 380 381 /* Completions with immediate require 1 extra entry */ 382 if (hw_rev > IRDMA_GEN_1) 383 ncqe *= 2; 384 385 if (ncqe < IRDMA_U_MINCQ_SIZE) 386 ncqe = IRDMA_U_MINCQ_SIZE; 387 388 return ncqe; 389 } 390 391 static inline size_t get_cq_total_bytes(u32 cq_size) { 392 return roundup(cq_size * sizeof(struct irdma_cqe), IRDMA_HW_PAGE_SIZE); 393 } 394 395 /** 396 * ucreate_cq - irdma util function to create a CQ 397 * @context: ibv context 398 * @attr_ex: CQ init attributes 399 * @ext_cq: flag to create an extendable or normal CQ 400 */ 401 static struct ibv_cq_ex * 402 ucreate_cq(struct ibv_context *context, 403 struct ibv_cq_init_attr_ex *attr_ex, 404 bool ext_cq) 405 { 406 struct irdma_cq_uk_init_info info = {}; 407 struct irdma_ureg_mr reg_mr_cmd = {}; 408 struct irdma_ucreate_cq_ex cmd = {}; 409 struct irdma_ucreate_cq_ex_resp resp = {}; 410 struct ibv_reg_mr_resp reg_mr_resp = {}; 411 struct irdma_ureg_mr reg_mr_shadow_cmd = {}; 412 struct ibv_reg_mr_resp reg_mr_shadow_resp = {}; 413 struct irdma_uk_attrs *uk_attrs; 414 struct irdma_uvcontext *iwvctx; 415 struct irdma_ucq *iwucq; 416 size_t total_size; 417 u32 cq_pages; 418 int ret, ncqe; 419 u8 hw_rev; 420 421 iwvctx = container_of(context, struct irdma_uvcontext, ibv_ctx); 422 uk_attrs = &iwvctx->uk_attrs; 423 hw_rev = uk_attrs->hw_rev; 424 425 if (ext_cq) { 426 u32 supported_flags = IRDMA_STANDARD_WC_FLAGS_EX; 427 428 if (hw_rev == IRDMA_GEN_1 || attr_ex->wc_flags & ~supported_flags) { 429 errno = EOPNOTSUPP; 430 return NULL; 431 } 432 } 433 434 if (attr_ex->cqe < uk_attrs->min_hw_cq_size || attr_ex->cqe > uk_attrs->max_hw_cq_size - 1) { 435 errno = EINVAL; 436 return NULL; 437 } 438 439 /* save the cqe requested by application */ 440 ncqe = attr_ex->cqe; 441 442 iwucq = calloc(1, sizeof(*iwucq)); 443 if (!iwucq) 444 return NULL; 445 446 ret = pthread_spin_init(&iwucq->lock, PTHREAD_PROCESS_PRIVATE); 447 if (ret) { 448 free(iwucq); 449 errno = ret; 450 return NULL; 451 } 452 453 info.cq_size = get_cq_size(attr_ex->cqe, hw_rev); 454 total_size = get_cq_total_bytes(info.cq_size); 455 iwucq->comp_vector = attr_ex->comp_vector; 456 LIST_INIT(&iwucq->resize_list); 457 cq_pages = total_size >> IRDMA_HW_PAGE_SHIFT; 458 459 if (!(uk_attrs->feature_flags & IRDMA_FEATURE_CQ_RESIZE)) 460 total_size = (cq_pages << IRDMA_HW_PAGE_SHIFT) + IRDMA_DB_SHADOW_AREA_SIZE; 461 462 iwucq->buf_size = total_size; 463 info.cq_base = irdma_alloc_hw_buf(total_size); 464 if (!info.cq_base) { 465 ret = ENOMEM; 466 goto err_cq_base; 467 } 468 469 memset(info.cq_base, 0, total_size); 470 reg_mr_cmd.reg_type = IRDMA_MEMREG_TYPE_CQ; 471 reg_mr_cmd.cq_pages = cq_pages; 472 473 ret = ibv_cmd_reg_mr(&iwvctx->iwupd->ibv_pd, info.cq_base, 474 total_size, (uintptr_t)info.cq_base, 475 IBV_ACCESS_LOCAL_WRITE, &iwucq->vmr.ibv_mr, 476 ®_mr_cmd.ibv_cmd, sizeof(reg_mr_cmd), 477 ®_mr_resp, sizeof(reg_mr_resp)); 478 if (ret) 479 goto err_dereg_mr; 480 481 iwucq->vmr.ibv_mr.pd = &iwvctx->iwupd->ibv_pd; 482 483 if (uk_attrs->feature_flags & IRDMA_FEATURE_CQ_RESIZE) { 484 info.shadow_area = irdma_alloc_hw_buf(IRDMA_DB_SHADOW_AREA_SIZE); 485 if (!info.shadow_area) { 486 ret = ENOMEM; 487 goto err_alloc_shadow; 488 } 489 490 memset(info.shadow_area, 0, IRDMA_DB_SHADOW_AREA_SIZE); 491 reg_mr_shadow_cmd.reg_type = IRDMA_MEMREG_TYPE_CQ; 492 reg_mr_shadow_cmd.cq_pages = 1; 493 494 ret = ibv_cmd_reg_mr(&iwvctx->iwupd->ibv_pd, info.shadow_area, 495 IRDMA_DB_SHADOW_AREA_SIZE, (uintptr_t)info.shadow_area, 496 IBV_ACCESS_LOCAL_WRITE, &iwucq->vmr_shadow_area.ibv_mr, 497 ®_mr_shadow_cmd.ibv_cmd, sizeof(reg_mr_shadow_cmd), 498 ®_mr_shadow_resp, sizeof(reg_mr_shadow_resp)); 499 if (ret) { 500 irdma_free_hw_buf(info.shadow_area, IRDMA_DB_SHADOW_AREA_SIZE); 501 goto err_alloc_shadow; 502 } 503 504 iwucq->vmr_shadow_area.ibv_mr.pd = &iwvctx->iwupd->ibv_pd; 505 506 } else { 507 info.shadow_area = (__le64 *) ((u8 *)info.cq_base + (cq_pages << IRDMA_HW_PAGE_SHIFT)); 508 } 509 510 attr_ex->cqe = info.cq_size; 511 cmd.user_cq_buf = (__u64) ((uintptr_t)info.cq_base); 512 cmd.user_shadow_area = (__u64) ((uintptr_t)info.shadow_area); 513 514 ret = ibv_cmd_create_cq_ex(context, attr_ex, &iwucq->verbs_cq.cq_ex, 515 &cmd.ibv_cmd, sizeof(cmd.ibv_cmd), sizeof(cmd), &resp.ibv_resp, 516 sizeof(resp.ibv_resp), sizeof(resp)); 517 attr_ex->cqe = ncqe; 518 if (ret) 519 goto err_create_cq; 520 521 if (ext_cq) 522 irdma_ibvcq_ex_fill_priv_funcs(iwucq, attr_ex); 523 info.cq_id = resp.cq_id; 524 /* Do not report the CQE's reserved for immediate and burned by HW */ 525 iwucq->verbs_cq.cq.cqe = ncqe; 526 info.cqe_alloc_db = (u32 *)((u8 *)iwvctx->db + IRDMA_DB_CQ_OFFSET); 527 irdma_uk_cq_init(&iwucq->cq, &info); 528 return &iwucq->verbs_cq.cq_ex; 529 530 err_create_cq: 531 if (iwucq->vmr_shadow_area.ibv_mr.handle) { 532 ibv_cmd_dereg_mr(&iwucq->vmr_shadow_area.ibv_mr); 533 irdma_free_hw_buf(info.shadow_area, IRDMA_DB_SHADOW_AREA_SIZE); 534 } 535 err_alloc_shadow: 536 ibv_cmd_dereg_mr(&iwucq->vmr.ibv_mr); 537 err_dereg_mr: 538 irdma_free_hw_buf(info.cq_base, total_size); 539 err_cq_base: 540 printf("%s: failed to initialize CQ\n", __func__); 541 pthread_spin_destroy(&iwucq->lock); 542 543 free(iwucq); 544 545 errno = ret; 546 return NULL; 547 } 548 549 struct ibv_cq * 550 irdma_ucreate_cq(struct ibv_context *context, int cqe, 551 struct ibv_comp_channel *channel, 552 int comp_vector) 553 { 554 struct ibv_cq_init_attr_ex attr_ex = { 555 .cqe = cqe, 556 .channel = channel, 557 .comp_vector = comp_vector, 558 }; 559 struct ibv_cq_ex *ibvcq_ex; 560 561 ibvcq_ex = ucreate_cq(context, &attr_ex, false); 562 563 return ibvcq_ex ? ibv_cq_ex_to_cq(ibvcq_ex) : NULL; 564 } 565 566 struct ibv_cq_ex * 567 irdma_ucreate_cq_ex(struct ibv_context *context, 568 struct ibv_cq_init_attr_ex *attr_ex) 569 { 570 return ucreate_cq(context, attr_ex, true); 571 } 572 573 /** 574 * irdma_free_cq_buf - free memory for cq buffer 575 * @cq_buf: cq buf to free 576 */ 577 static void 578 irdma_free_cq_buf(struct irdma_cq_buf *cq_buf) 579 { 580 ibv_cmd_dereg_mr(&cq_buf->vmr.ibv_mr); 581 irdma_free_hw_buf(cq_buf->cq.cq_base, get_cq_total_bytes(cq_buf->cq.cq_size)); 582 free(cq_buf); 583 } 584 585 /** 586 * irdma_process_resize_list - process the cq list to remove buffers 587 * @iwucq: cq which owns the list 588 * @lcqe_buf: cq buf where the last cqe is found 589 */ 590 static int 591 irdma_process_resize_list(struct irdma_ucq *iwucq, 592 struct irdma_cq_buf *lcqe_buf) 593 { 594 struct irdma_cq_buf *cq_buf, *next; 595 int cq_cnt = 0; 596 597 LIST_FOREACH_SAFE(cq_buf, &iwucq->resize_list, list, next) { 598 if (cq_buf == lcqe_buf) 599 return cq_cnt; 600 601 LIST_REMOVE(cq_buf, list); 602 irdma_free_cq_buf(cq_buf); 603 cq_cnt++; 604 } 605 606 return cq_cnt; 607 } 608 609 /** 610 * irdma_udestroy_cq - destroys cq 611 * @cq: ptr to cq to be destroyed 612 */ 613 int 614 irdma_udestroy_cq(struct ibv_cq *cq) 615 { 616 struct irdma_uk_attrs *uk_attrs; 617 struct irdma_uvcontext *iwvctx; 618 struct irdma_ucq *iwucq; 619 int ret; 620 621 iwucq = container_of(cq, struct irdma_ucq, verbs_cq.cq); 622 iwvctx = container_of(cq->context, struct irdma_uvcontext, ibv_ctx); 623 uk_attrs = &iwvctx->uk_attrs; 624 625 ret = pthread_spin_destroy(&iwucq->lock); 626 if (ret) 627 goto err; 628 629 irdma_process_resize_list(iwucq, NULL); 630 ret = ibv_cmd_destroy_cq(cq); 631 if (ret) 632 goto err; 633 634 ibv_cmd_dereg_mr(&iwucq->vmr.ibv_mr); 635 irdma_free_hw_buf(iwucq->cq.cq_base, iwucq->buf_size); 636 637 if (uk_attrs->feature_flags & IRDMA_FEATURE_CQ_RESIZE) { 638 ibv_cmd_dereg_mr(&iwucq->vmr_shadow_area.ibv_mr); 639 irdma_free_hw_buf(iwucq->cq.shadow_area, IRDMA_DB_SHADOW_AREA_SIZE); 640 } 641 free(iwucq); 642 return 0; 643 644 err: 645 return ret; 646 } 647 648 static enum ibv_wc_status 649 irdma_flush_err_to_ib_wc_status(enum irdma_flush_opcode opcode) 650 { 651 switch (opcode) { 652 case FLUSH_PROT_ERR: 653 return IBV_WC_LOC_PROT_ERR; 654 case FLUSH_REM_ACCESS_ERR: 655 return IBV_WC_REM_ACCESS_ERR; 656 case FLUSH_LOC_QP_OP_ERR: 657 return IBV_WC_LOC_QP_OP_ERR; 658 case FLUSH_REM_OP_ERR: 659 return IBV_WC_REM_OP_ERR; 660 case FLUSH_LOC_LEN_ERR: 661 return IBV_WC_LOC_LEN_ERR; 662 case FLUSH_GENERAL_ERR: 663 return IBV_WC_WR_FLUSH_ERR; 664 case FLUSH_MW_BIND_ERR: 665 return IBV_WC_MW_BIND_ERR; 666 case FLUSH_REM_INV_REQ_ERR: 667 return IBV_WC_REM_INV_REQ_ERR; 668 case FLUSH_RETRY_EXC_ERR: 669 return IBV_WC_RETRY_EXC_ERR; 670 case FLUSH_FATAL_ERR: 671 default: 672 return IBV_WC_FATAL_ERR; 673 } 674 } 675 676 static inline void 677 set_ib_wc_op_sq(struct irdma_cq_poll_info *cur_cqe, struct ibv_wc *entry) 678 { 679 switch (cur_cqe->op_type) { 680 case IRDMA_OP_TYPE_RDMA_WRITE: 681 case IRDMA_OP_TYPE_RDMA_WRITE_SOL: 682 entry->opcode = IBV_WC_RDMA_WRITE; 683 break; 684 case IRDMA_OP_TYPE_RDMA_READ: 685 entry->opcode = IBV_WC_RDMA_READ; 686 break; 687 case IRDMA_OP_TYPE_SEND_SOL: 688 case IRDMA_OP_TYPE_SEND_SOL_INV: 689 case IRDMA_OP_TYPE_SEND_INV: 690 case IRDMA_OP_TYPE_SEND: 691 entry->opcode = IBV_WC_SEND; 692 break; 693 case IRDMA_OP_TYPE_BIND_MW: 694 entry->opcode = IBV_WC_BIND_MW; 695 break; 696 case IRDMA_OP_TYPE_INV_STAG: 697 entry->opcode = IBV_WC_LOCAL_INV; 698 break; 699 default: 700 entry->status = IBV_WC_GENERAL_ERR; 701 printf("%s: Invalid opcode = %d in CQE\n", 702 __func__, cur_cqe->op_type); 703 } 704 } 705 706 static inline void 707 set_ib_wc_op_rq(struct irdma_cq_poll_info *cur_cqe, 708 struct ibv_wc *entry, bool send_imm_support) 709 { 710 if (!send_imm_support) { 711 entry->opcode = cur_cqe->imm_valid ? IBV_WC_RECV_RDMA_WITH_IMM : 712 IBV_WC_RECV; 713 return; 714 } 715 switch (cur_cqe->op_type) { 716 case IBV_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE: 717 case IBV_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE: 718 entry->opcode = IBV_WC_RECV_RDMA_WITH_IMM; 719 break; 720 default: 721 entry->opcode = IBV_WC_RECV; 722 } 723 } 724 725 /** 726 * irdma_process_cqe_ext - process current cqe for extended CQ 727 * @cur_cqe - current cqe info 728 */ 729 static void 730 irdma_process_cqe_ext(struct irdma_cq_poll_info *cur_cqe) 731 { 732 struct irdma_ucq *iwucq = container_of(cur_cqe, struct irdma_ucq, cur_cqe); 733 struct ibv_cq_ex *ibvcq_ex = &iwucq->verbs_cq.cq_ex; 734 735 ibvcq_ex->wr_id = cur_cqe->wr_id; 736 if (cur_cqe->error) 737 ibvcq_ex->status = (cur_cqe->comp_status == IRDMA_COMPL_STATUS_FLUSHED) ? 738 irdma_flush_err_to_ib_wc_status(cur_cqe->minor_err) : IBV_WC_GENERAL_ERR; 739 else 740 ibvcq_ex->status = IBV_WC_SUCCESS; 741 } 742 743 /** 744 * irdma_process_cqe - process current cqe info 745 * @entry - ibv_wc object to fill in for non-extended CQ 746 * @cur_cqe - current cqe info 747 */ 748 static void 749 irdma_process_cqe(struct ibv_wc *entry, struct irdma_cq_poll_info *cur_cqe) 750 { 751 struct irdma_qp_uk *qp; 752 struct ibv_qp *ib_qp; 753 754 entry->wc_flags = 0; 755 entry->wr_id = cur_cqe->wr_id; 756 entry->qp_num = cur_cqe->qp_id; 757 qp = cur_cqe->qp_handle; 758 ib_qp = qp->back_qp; 759 760 if (cur_cqe->error) { 761 entry->status = (cur_cqe->comp_status == IRDMA_COMPL_STATUS_FLUSHED) ? 762 irdma_flush_err_to_ib_wc_status(cur_cqe->minor_err) : IBV_WC_GENERAL_ERR; 763 entry->vendor_err = cur_cqe->major_err << 16 | 764 cur_cqe->minor_err; 765 } else { 766 entry->status = IBV_WC_SUCCESS; 767 } 768 769 if (cur_cqe->imm_valid) { 770 entry->imm_data = htonl(cur_cqe->imm_data); 771 entry->wc_flags |= IBV_WC_WITH_IMM; 772 } 773 774 if (cur_cqe->q_type == IRDMA_CQE_QTYPE_SQ) { 775 set_ib_wc_op_sq(cur_cqe, entry); 776 } else { 777 set_ib_wc_op_rq(cur_cqe, entry, 778 qp->qp_caps & IRDMA_SEND_WITH_IMM ? 779 true : false); 780 if (ib_qp->qp_type != IBV_QPT_UD && 781 cur_cqe->stag_invalid_set) { 782 entry->invalidated_rkey = cur_cqe->inv_stag; 783 entry->wc_flags |= IBV_WC_WITH_INV; 784 } 785 } 786 787 if (ib_qp->qp_type == IBV_QPT_UD) { 788 entry->src_qp = cur_cqe->ud_src_qpn; 789 entry->wc_flags |= IBV_WC_GRH; 790 } else { 791 entry->src_qp = cur_cqe->qp_id; 792 } 793 entry->byte_len = cur_cqe->bytes_xfered; 794 } 795 796 /** 797 * irdma_poll_one - poll one entry of the CQ 798 * @ukcq: ukcq to poll 799 * @cur_cqe: current CQE info to be filled in 800 * @entry: ibv_wc object to be filled for non-extended CQ or NULL for extended CQ 801 * 802 * Returns the internal irdma device error code or 0 on success 803 */ 804 static int 805 irdma_poll_one(struct irdma_cq_uk *ukcq, struct irdma_cq_poll_info *cur_cqe, 806 struct ibv_wc *entry) 807 { 808 int ret = irdma_uk_cq_poll_cmpl(ukcq, cur_cqe); 809 810 if (ret) 811 return ret; 812 813 if (!entry) 814 irdma_process_cqe_ext(cur_cqe); 815 else 816 irdma_process_cqe(entry, cur_cqe); 817 818 return 0; 819 } 820 821 /** 822 * __irdma_upoll_cq - irdma util function to poll device CQ 823 * @iwucq: irdma cq to poll 824 * @num_entries: max cq entries to poll 825 * @entry: pointer to array of ibv_wc objects to be filled in for each completion or NULL if ext CQ 826 * 827 * Returns non-negative value equal to the number of completions 828 * found. On failure, EINVAL 829 */ 830 static int 831 __irdma_upoll_cq(struct irdma_ucq *iwucq, int num_entries, 832 struct ibv_wc *entry) 833 { 834 struct irdma_cq_buf *cq_buf, *next; 835 struct irdma_cq_buf *last_buf = NULL; 836 struct irdma_cq_poll_info *cur_cqe = &iwucq->cur_cqe; 837 bool cq_new_cqe = false; 838 int resized_bufs = 0; 839 int npolled = 0; 840 int ret; 841 842 /* go through the list of previously resized CQ buffers */ 843 LIST_FOREACH_SAFE(cq_buf, &iwucq->resize_list, list, next) { 844 while (npolled < num_entries) { 845 ret = irdma_poll_one(&cq_buf->cq, cur_cqe, 846 entry ? entry + npolled : NULL); 847 if (!ret) { 848 ++npolled; 849 cq_new_cqe = true; 850 continue; 851 } 852 if (ret == ENOENT) 853 break; 854 /* QP using the CQ is destroyed. Skip reporting this CQE */ 855 if (ret == EFAULT) { 856 cq_new_cqe = true; 857 continue; 858 } 859 goto error; 860 } 861 862 /* save the resized CQ buffer which received the last cqe */ 863 if (cq_new_cqe) 864 last_buf = cq_buf; 865 cq_new_cqe = false; 866 } 867 868 /* check the current CQ for new cqes */ 869 while (npolled < num_entries) { 870 ret = irdma_poll_one(&iwucq->cq, cur_cqe, 871 entry ? entry + npolled : NULL); 872 if (!ret) { 873 ++npolled; 874 cq_new_cqe = true; 875 continue; 876 } 877 if (ret == ENOENT) 878 break; 879 /* QP using the CQ is destroyed. Skip reporting this CQE */ 880 if (ret == EFAULT) { 881 cq_new_cqe = true; 882 continue; 883 } 884 goto error; 885 } 886 887 if (cq_new_cqe) 888 /* all previous CQ resizes are complete */ 889 resized_bufs = irdma_process_resize_list(iwucq, NULL); 890 else if (last_buf) 891 /* only CQ resizes up to the last_buf are complete */ 892 resized_bufs = irdma_process_resize_list(iwucq, last_buf); 893 if (resized_bufs) 894 /* report to the HW the number of complete CQ resizes */ 895 irdma_uk_cq_set_resized_cnt(&iwucq->cq, resized_bufs); 896 897 return npolled; 898 899 error: 900 printf("%s: Error polling CQ, irdma_err: %d\n", __func__, ret); 901 902 return EINVAL; 903 } 904 905 /** 906 * irdma_upoll_cq - verb API callback to poll device CQ 907 * @cq: ibv_cq to poll 908 * @num_entries: max cq entries to poll 909 * @entry: pointer to array of ibv_wc objects to be filled in for each completion 910 * 911 * Returns non-negative value equal to the number of completions 912 * found and a negative error code on failure 913 */ 914 int 915 irdma_upoll_cq(struct ibv_cq *cq, int num_entries, struct ibv_wc *entry) 916 { 917 struct irdma_ucq *iwucq; 918 int ret; 919 920 iwucq = container_of(cq, struct irdma_ucq, verbs_cq.cq); 921 ret = pthread_spin_lock(&iwucq->lock); 922 if (ret) 923 return -ret; 924 925 ret = __irdma_upoll_cq(iwucq, num_entries, entry); 926 927 pthread_spin_unlock(&iwucq->lock); 928 929 return ret; 930 } 931 932 /** 933 * irdma_start_poll - verb_ex API callback to poll batch of WC's 934 * @ibvcq_ex: ibv extended CQ 935 * @attr: attributes (not used) 936 * 937 * Start polling batch of work completions. Return 0 on success, ENONENT when 938 * no completions are available on CQ. And an error code on errors 939 */ 940 static int 941 irdma_start_poll(struct ibv_cq_ex *ibvcq_ex, struct ibv_poll_cq_attr *attr) 942 { 943 struct irdma_ucq *iwucq; 944 int ret; 945 946 iwucq = container_of(ibvcq_ex, struct irdma_ucq, verbs_cq.cq_ex); 947 ret = pthread_spin_lock(&iwucq->lock); 948 if (ret) 949 return ret; 950 951 ret = __irdma_upoll_cq(iwucq, 1, NULL); 952 if (ret == 1) 953 return 0; 954 955 /* No Completions on CQ */ 956 if (!ret) 957 ret = ENOENT; 958 959 pthread_spin_unlock(&iwucq->lock); 960 961 return ret; 962 } 963 964 /** 965 * irdma_next_poll - verb_ex API callback to get next WC 966 * @ibvcq_ex: ibv extended CQ 967 * 968 * Return 0 on success, ENONENT when no completions are available on CQ. 969 * And an error code on errors 970 */ 971 static int 972 irdma_next_poll(struct ibv_cq_ex *ibvcq_ex) 973 { 974 struct irdma_ucq *iwucq; 975 int ret; 976 977 iwucq = container_of(ibvcq_ex, struct irdma_ucq, verbs_cq.cq_ex); 978 ret = __irdma_upoll_cq(iwucq, 1, NULL); 979 if (ret == 1) 980 return 0; 981 982 /* No Completions on CQ */ 983 if (!ret) 984 ret = ENOENT; 985 986 return ret; 987 } 988 989 /** 990 * irdma_end_poll - verb_ex API callback to end polling of WC's 991 * @ibvcq_ex: ibv extended CQ 992 */ 993 static void 994 irdma_end_poll(struct ibv_cq_ex *ibvcq_ex) 995 { 996 struct irdma_ucq *iwucq = container_of(ibvcq_ex, struct irdma_ucq, 997 verbs_cq.cq_ex); 998 999 pthread_spin_unlock(&iwucq->lock); 1000 } 1001 1002 static enum ibv_wc_opcode 1003 irdma_wc_read_opcode(struct ibv_cq_ex *ibvcq_ex) 1004 { 1005 struct irdma_ucq *iwucq = container_of(ibvcq_ex, struct irdma_ucq, 1006 verbs_cq.cq_ex); 1007 1008 switch (iwucq->cur_cqe.op_type) { 1009 case IRDMA_OP_TYPE_RDMA_WRITE: 1010 case IRDMA_OP_TYPE_RDMA_WRITE_SOL: 1011 return IBV_WC_RDMA_WRITE; 1012 case IRDMA_OP_TYPE_RDMA_READ: 1013 return IBV_WC_RDMA_READ; 1014 case IRDMA_OP_TYPE_SEND_SOL: 1015 case IRDMA_OP_TYPE_SEND_SOL_INV: 1016 case IRDMA_OP_TYPE_SEND_INV: 1017 case IRDMA_OP_TYPE_SEND: 1018 return IBV_WC_SEND; 1019 case IRDMA_OP_TYPE_BIND_MW: 1020 return IBV_WC_BIND_MW; 1021 case IRDMA_OP_TYPE_REC: 1022 return IBV_WC_RECV; 1023 case IRDMA_OP_TYPE_REC_IMM: 1024 return IBV_WC_RECV_RDMA_WITH_IMM; 1025 case IRDMA_OP_TYPE_INV_STAG: 1026 return IBV_WC_LOCAL_INV; 1027 } 1028 1029 printf("%s: Invalid opcode = %d in CQE\n", __func__, 1030 iwucq->cur_cqe.op_type); 1031 1032 return 0; 1033 } 1034 1035 static uint32_t irdma_wc_read_vendor_err(struct ibv_cq_ex *ibvcq_ex){ 1036 struct irdma_cq_poll_info *cur_cqe; 1037 struct irdma_ucq *iwucq; 1038 1039 iwucq = container_of(ibvcq_ex, struct irdma_ucq, verbs_cq.cq_ex); 1040 cur_cqe = &iwucq->cur_cqe; 1041 1042 return cur_cqe->error ? cur_cqe->major_err << 16 | cur_cqe->minor_err : 0; 1043 } 1044 1045 static int 1046 irdma_wc_read_wc_flags(struct ibv_cq_ex *ibvcq_ex) 1047 { 1048 struct irdma_cq_poll_info *cur_cqe; 1049 struct irdma_ucq *iwucq; 1050 struct irdma_qp_uk *qp; 1051 struct ibv_qp *ib_qp; 1052 int wc_flags = 0; 1053 1054 iwucq = container_of(ibvcq_ex, struct irdma_ucq, verbs_cq.cq_ex); 1055 cur_cqe = &iwucq->cur_cqe; 1056 qp = cur_cqe->qp_handle; 1057 ib_qp = qp->back_qp; 1058 1059 if (cur_cqe->imm_valid) 1060 wc_flags |= IBV_WC_WITH_IMM; 1061 1062 if (ib_qp->qp_type == IBV_QPT_UD) { 1063 wc_flags |= IBV_WC_GRH; 1064 } else { 1065 if (cur_cqe->stag_invalid_set) { 1066 switch (cur_cqe->op_type) { 1067 case IRDMA_OP_TYPE_REC: 1068 wc_flags |= IBV_WC_WITH_INV; 1069 break; 1070 case IRDMA_OP_TYPE_REC_IMM: 1071 wc_flags |= IBV_WC_WITH_INV; 1072 break; 1073 } 1074 } 1075 } 1076 1077 return wc_flags; 1078 } 1079 1080 static uint32_t irdma_wc_read_byte_len(struct ibv_cq_ex *ibvcq_ex){ 1081 struct irdma_ucq *iwucq = container_of(ibvcq_ex, struct irdma_ucq, 1082 verbs_cq.cq_ex); 1083 1084 return iwucq->cur_cqe.bytes_xfered; 1085 } 1086 1087 static __be32 irdma_wc_read_imm_data(struct ibv_cq_ex *ibvcq_ex){ 1088 struct irdma_cq_poll_info *cur_cqe; 1089 struct irdma_ucq *iwucq; 1090 1091 iwucq = container_of(ibvcq_ex, struct irdma_ucq, verbs_cq.cq_ex); 1092 cur_cqe = &iwucq->cur_cqe; 1093 1094 return cur_cqe->imm_valid ? htonl(cur_cqe->imm_data) : 0; 1095 } 1096 1097 static uint32_t irdma_wc_read_qp_num(struct ibv_cq_ex *ibvcq_ex){ 1098 struct irdma_ucq *iwucq = container_of(ibvcq_ex, struct irdma_ucq, 1099 verbs_cq.cq_ex); 1100 1101 return iwucq->cur_cqe.qp_id; 1102 } 1103 1104 static uint32_t irdma_wc_read_src_qp(struct ibv_cq_ex *ibvcq_ex){ 1105 struct irdma_cq_poll_info *cur_cqe; 1106 struct irdma_ucq *iwucq; 1107 struct irdma_qp_uk *qp; 1108 struct ibv_qp *ib_qp; 1109 1110 iwucq = container_of(ibvcq_ex, struct irdma_ucq, verbs_cq.cq_ex); 1111 cur_cqe = &iwucq->cur_cqe; 1112 qp = cur_cqe->qp_handle; 1113 ib_qp = qp->back_qp; 1114 1115 return ib_qp->qp_type == IBV_QPT_UD ? cur_cqe->ud_src_qpn : cur_cqe->qp_id; 1116 } 1117 1118 static uint8_t irdma_wc_read_sl(struct ibv_cq_ex *ibvcq_ex){ 1119 return 0; 1120 } 1121 1122 void 1123 irdma_ibvcq_ex_fill_priv_funcs(struct irdma_ucq *iwucq, 1124 struct ibv_cq_init_attr_ex *attr_ex) 1125 { 1126 struct ibv_cq_ex *ibvcq_ex = &iwucq->verbs_cq.cq_ex; 1127 1128 ibvcq_ex->start_poll = irdma_start_poll; 1129 ibvcq_ex->end_poll = irdma_end_poll; 1130 ibvcq_ex->next_poll = irdma_next_poll; 1131 1132 ibvcq_ex->read_opcode = irdma_wc_read_opcode; 1133 ibvcq_ex->read_vendor_err = irdma_wc_read_vendor_err; 1134 ibvcq_ex->read_wc_flags = irdma_wc_read_wc_flags; 1135 1136 if (attr_ex->wc_flags & IBV_WC_EX_WITH_BYTE_LEN) 1137 ibvcq_ex->read_byte_len = irdma_wc_read_byte_len; 1138 if (attr_ex->wc_flags & IBV_WC_EX_WITH_IMM) 1139 ibvcq_ex->read_imm_data = irdma_wc_read_imm_data; 1140 if (attr_ex->wc_flags & IBV_WC_EX_WITH_QP_NUM) 1141 ibvcq_ex->read_qp_num = irdma_wc_read_qp_num; 1142 if (attr_ex->wc_flags & IBV_WC_EX_WITH_SRC_QP) 1143 ibvcq_ex->read_src_qp = irdma_wc_read_src_qp; 1144 if (attr_ex->wc_flags & IBV_WC_EX_WITH_SL) 1145 ibvcq_ex->read_sl = irdma_wc_read_sl; 1146 } 1147 1148 /** 1149 * irdma_arm_cq - arm of cq 1150 * @iwucq: cq to which arm 1151 * @cq_notify: notification params 1152 */ 1153 static void 1154 irdma_arm_cq(struct irdma_ucq *iwucq, 1155 enum irdma_cmpl_notify cq_notify) 1156 { 1157 iwucq->is_armed = true; 1158 iwucq->arm_sol = true; 1159 iwucq->skip_arm = false; 1160 iwucq->skip_sol = true; 1161 irdma_uk_cq_request_notification(&iwucq->cq, cq_notify); 1162 } 1163 1164 /** 1165 * irdma_uarm_cq - callback for arm of cq 1166 * @cq: cq to arm 1167 * @solicited: to get notify params 1168 */ 1169 int 1170 irdma_uarm_cq(struct ibv_cq *cq, int solicited) 1171 { 1172 struct irdma_ucq *iwucq; 1173 enum irdma_cmpl_notify cq_notify = IRDMA_CQ_COMPL_EVENT; 1174 int ret; 1175 1176 iwucq = container_of(cq, struct irdma_ucq, verbs_cq.cq); 1177 if (solicited) 1178 cq_notify = IRDMA_CQ_COMPL_SOLICITED; 1179 1180 ret = pthread_spin_lock(&iwucq->lock); 1181 if (ret) 1182 return ret; 1183 1184 if (iwucq->is_armed) { 1185 if (iwucq->arm_sol && !solicited) { 1186 irdma_arm_cq(iwucq, cq_notify); 1187 } else { 1188 iwucq->skip_arm = true; 1189 iwucq->skip_sol = solicited ? true : false; 1190 } 1191 } else { 1192 irdma_arm_cq(iwucq, cq_notify); 1193 } 1194 1195 pthread_spin_unlock(&iwucq->lock); 1196 1197 return 0; 1198 } 1199 1200 /** 1201 * irdma_cq_event - cq to do completion event 1202 * @cq: cq to arm 1203 */ 1204 void 1205 irdma_cq_event(struct ibv_cq *cq) 1206 { 1207 struct irdma_ucq *iwucq; 1208 1209 iwucq = container_of(cq, struct irdma_ucq, verbs_cq.cq); 1210 if (pthread_spin_lock(&iwucq->lock)) 1211 return; 1212 1213 if (iwucq->skip_arm) 1214 irdma_arm_cq(iwucq, IRDMA_CQ_COMPL_EVENT); 1215 else 1216 iwucq->is_armed = false; 1217 1218 pthread_spin_unlock(&iwucq->lock); 1219 } 1220 1221 void * 1222 irdma_mmap(int fd, off_t offset) 1223 { 1224 void *map; 1225 1226 map = mmap(NULL, IRDMA_HW_PAGE_SIZE, PROT_WRITE | PROT_READ, MAP_SHARED, 1227 fd, offset); 1228 if (map == MAP_FAILED) 1229 return map; 1230 1231 if (ibv_dontfork_range(map, IRDMA_HW_PAGE_SIZE)) { 1232 munmap(map, IRDMA_HW_PAGE_SIZE); 1233 return MAP_FAILED; 1234 } 1235 1236 return map; 1237 } 1238 1239 void 1240 irdma_munmap(void *map) 1241 { 1242 ibv_dofork_range(map, IRDMA_HW_PAGE_SIZE); 1243 munmap(map, IRDMA_HW_PAGE_SIZE); 1244 } 1245 1246 /** 1247 * irdma_destroy_vmapped_qp - destroy resources for qp 1248 * @iwuqp: qp struct for resources 1249 */ 1250 static int 1251 irdma_destroy_vmapped_qp(struct irdma_uqp *iwuqp) 1252 { 1253 int ret; 1254 1255 ret = ibv_cmd_destroy_qp(&iwuqp->ibv_qp); 1256 if (ret) 1257 return ret; 1258 1259 if (iwuqp->qp.push_db) 1260 irdma_munmap(iwuqp->qp.push_db); 1261 if (iwuqp->qp.push_wqe) 1262 irdma_munmap(iwuqp->qp.push_wqe); 1263 1264 ibv_cmd_dereg_mr(&iwuqp->vmr.ibv_mr); 1265 1266 return 0; 1267 } 1268 1269 /** 1270 * irdma_vmapped_qp - create resources for qp 1271 * @iwuqp: qp struct for resources 1272 * @pd: pd for the qp 1273 * @attr: attributes of qp passed 1274 * @resp: response back from create qp 1275 * @info: uk info for initializing user level qp 1276 * @abi_ver: abi version of the create qp command 1277 */ 1278 static int 1279 irdma_vmapped_qp(struct irdma_uqp *iwuqp, struct ibv_pd *pd, 1280 struct ibv_qp_init_attr *attr, 1281 struct irdma_qp_uk_init_info *info, 1282 bool legacy_mode) 1283 { 1284 struct irdma_ucreate_qp cmd = {}; 1285 size_t sqsize, rqsize, totalqpsize; 1286 struct irdma_ucreate_qp_resp resp = {}; 1287 struct irdma_ureg_mr reg_mr_cmd = {}; 1288 struct ibv_reg_mr_resp reg_mr_resp = {}; 1289 int ret; 1290 1291 sqsize = roundup(info->sq_depth * IRDMA_QP_WQE_MIN_SIZE, IRDMA_HW_PAGE_SIZE); 1292 rqsize = roundup(info->rq_depth * IRDMA_QP_WQE_MIN_SIZE, IRDMA_HW_PAGE_SIZE); 1293 totalqpsize = rqsize + sqsize + IRDMA_DB_SHADOW_AREA_SIZE; 1294 info->sq = irdma_alloc_hw_buf(totalqpsize); 1295 iwuqp->buf_size = totalqpsize; 1296 1297 if (!info->sq) 1298 return ENOMEM; 1299 1300 memset(info->sq, 0, totalqpsize); 1301 info->rq = &info->sq[sqsize / IRDMA_QP_WQE_MIN_SIZE]; 1302 info->shadow_area = info->rq[rqsize / IRDMA_QP_WQE_MIN_SIZE].elem; 1303 1304 reg_mr_cmd.reg_type = IRDMA_MEMREG_TYPE_QP; 1305 reg_mr_cmd.sq_pages = sqsize >> IRDMA_HW_PAGE_SHIFT; 1306 reg_mr_cmd.rq_pages = rqsize >> IRDMA_HW_PAGE_SHIFT; 1307 1308 ret = ibv_cmd_reg_mr(pd, info->sq, totalqpsize, 1309 (uintptr_t)info->sq, IBV_ACCESS_LOCAL_WRITE, 1310 &iwuqp->vmr.ibv_mr, ®_mr_cmd.ibv_cmd, 1311 sizeof(reg_mr_cmd), ®_mr_resp, 1312 sizeof(reg_mr_resp)); 1313 if (ret) 1314 goto err_dereg_mr; 1315 1316 cmd.user_wqe_bufs = (__u64) ((uintptr_t)info->sq); 1317 cmd.user_compl_ctx = (__u64) (uintptr_t)&iwuqp->qp; 1318 cmd.comp_mask |= IRDMA_CREATE_QP_USE_START_WQE_IDX; 1319 1320 ret = ibv_cmd_create_qp(pd, &iwuqp->ibv_qp, attr, &cmd.ibv_cmd, 1321 sizeof(cmd), &resp.ibv_resp, 1322 sizeof(struct irdma_ucreate_qp_resp)); 1323 if (ret) 1324 goto err_qp; 1325 1326 info->sq_size = resp.actual_sq_size; 1327 info->rq_size = resp.actual_rq_size; 1328 info->first_sq_wq = legacy_mode ? 1 : resp.lsmm; 1329 if (resp.comp_mask & IRDMA_CREATE_QP_USE_START_WQE_IDX) 1330 info->start_wqe_idx = resp.start_wqe_idx; 1331 info->qp_caps = resp.qp_caps; 1332 info->qp_id = resp.qp_id; 1333 iwuqp->irdma_drv_opt = resp.irdma_drv_opt; 1334 iwuqp->ibv_qp.qp_num = resp.qp_id; 1335 1336 iwuqp->send_cq = container_of(attr->send_cq, struct irdma_ucq, 1337 verbs_cq.cq); 1338 iwuqp->recv_cq = container_of(attr->recv_cq, struct irdma_ucq, 1339 verbs_cq.cq); 1340 iwuqp->send_cq->uqp = iwuqp; 1341 iwuqp->recv_cq->uqp = iwuqp; 1342 1343 return 0; 1344 err_qp: 1345 ibv_cmd_dereg_mr(&iwuqp->vmr.ibv_mr); 1346 err_dereg_mr: 1347 printf("%s: failed to create QP, status %d\n", __func__, ret); 1348 irdma_free_hw_buf(info->sq, iwuqp->buf_size); 1349 return ret; 1350 } 1351 1352 /** 1353 * irdma_ucreate_qp - create qp on user app 1354 * @pd: pd for the qp 1355 * @attr: attributes of the qp to be created (sizes, sge, cq) 1356 */ 1357 struct ibv_qp * 1358 irdma_ucreate_qp(struct ibv_pd *pd, 1359 struct ibv_qp_init_attr *attr) 1360 { 1361 struct irdma_qp_uk_init_info info = {}; 1362 struct irdma_uk_attrs *uk_attrs; 1363 struct irdma_uvcontext *iwvctx; 1364 struct irdma_uqp *iwuqp; 1365 int status; 1366 1367 if (attr->qp_type != IBV_QPT_RC && attr->qp_type != IBV_QPT_UD) { 1368 printf("%s: failed to create QP, unsupported QP type: 0x%x\n", 1369 __func__, attr->qp_type); 1370 errno = EOPNOTSUPP; 1371 return NULL; 1372 } 1373 1374 iwvctx = container_of(pd->context, struct irdma_uvcontext, ibv_ctx); 1375 uk_attrs = &iwvctx->uk_attrs; 1376 1377 if (attr->cap.max_send_sge > uk_attrs->max_hw_wq_frags || 1378 attr->cap.max_recv_sge > uk_attrs->max_hw_wq_frags || 1379 attr->cap.max_send_wr > uk_attrs->max_hw_wq_quanta || 1380 attr->cap.max_recv_wr > uk_attrs->max_hw_rq_quanta || 1381 attr->cap.max_inline_data > uk_attrs->max_hw_inline) { 1382 errno = EINVAL; 1383 return NULL; 1384 } 1385 1386 info.uk_attrs = uk_attrs; 1387 info.sq_size = attr->cap.max_send_wr; 1388 info.rq_size = attr->cap.max_recv_wr; 1389 info.max_sq_frag_cnt = attr->cap.max_send_sge; 1390 info.max_rq_frag_cnt = attr->cap.max_recv_sge; 1391 info.max_inline_data = attr->cap.max_inline_data; 1392 info.abi_ver = iwvctx->abi_ver; 1393 1394 status = irdma_uk_calc_depth_shift_sq(&info, &info.sq_depth, &info.sq_shift); 1395 if (status) { 1396 printf("%s: invalid SQ attributes, max_send_wr=%d max_send_sge=%d max_inline=%d\n", 1397 __func__, attr->cap.max_send_wr, attr->cap.max_send_sge, 1398 attr->cap.max_inline_data); 1399 errno = status; 1400 return NULL; 1401 } 1402 1403 status = irdma_uk_calc_depth_shift_rq(&info, &info.rq_depth, &info.rq_shift); 1404 if (status) { 1405 printf("%s: invalid RQ attributes, recv_wr=%d recv_sge=%d\n", 1406 __func__, attr->cap.max_recv_wr, attr->cap.max_recv_sge); 1407 errno = status; 1408 return NULL; 1409 } 1410 1411 iwuqp = memalign(1024, sizeof(*iwuqp)); 1412 if (!iwuqp) 1413 return NULL; 1414 1415 memset(iwuqp, 0, sizeof(*iwuqp)); 1416 1417 status = pthread_spin_init(&iwuqp->lock, PTHREAD_PROCESS_PRIVATE); 1418 if (status) 1419 goto err_free_qp; 1420 1421 info.sq_size = info.sq_depth >> info.sq_shift; 1422 info.rq_size = info.rq_depth >> info.rq_shift; 1423 /** 1424 * Maintain backward compatibility with older ABI which pass sq 1425 * and rq depth (in quanta) in cap.max_send_wr a cap.max_recv_wr 1426 */ 1427 if (!iwvctx->use_raw_attrs) { 1428 attr->cap.max_send_wr = info.sq_size; 1429 attr->cap.max_recv_wr = info.rq_size; 1430 } 1431 1432 info.wqe_alloc_db = (u32 *)iwvctx->db; 1433 info.legacy_mode = iwvctx->legacy_mode; 1434 info.sq_wrtrk_array = calloc(info.sq_depth, sizeof(*info.sq_wrtrk_array)); 1435 if (!info.sq_wrtrk_array) { 1436 status = errno; /* preserve errno */ 1437 goto err_destroy_lock; 1438 } 1439 1440 info.rq_wrid_array = calloc(info.rq_depth, sizeof(*info.rq_wrid_array)); 1441 if (!info.rq_wrid_array) { 1442 status = errno; /* preserve errno */ 1443 goto err_free_sq_wrtrk; 1444 } 1445 1446 iwuqp->sq_sig_all = attr->sq_sig_all; 1447 iwuqp->qp_type = attr->qp_type; 1448 status = irdma_vmapped_qp(iwuqp, pd, attr, &info, iwvctx->legacy_mode); 1449 if (status) 1450 goto err_free_rq_wrid; 1451 1452 iwuqp->qp.back_qp = iwuqp; 1453 iwuqp->qp.lock = &iwuqp->lock; 1454 1455 status = irdma_uk_qp_init(&iwuqp->qp, &info); 1456 if (status) 1457 goto err_free_vmap_qp; 1458 1459 attr->cap.max_send_wr = (info.sq_depth - IRDMA_SQ_RSVD) >> info.sq_shift; 1460 attr->cap.max_recv_wr = (info.rq_depth - IRDMA_RQ_RSVD) >> info.rq_shift; 1461 1462 return &iwuqp->ibv_qp; 1463 1464 err_free_vmap_qp: 1465 irdma_destroy_vmapped_qp(iwuqp); 1466 irdma_free_hw_buf(info.sq, iwuqp->buf_size); 1467 err_free_rq_wrid: 1468 free(info.rq_wrid_array); 1469 err_free_sq_wrtrk: 1470 free(info.sq_wrtrk_array); 1471 err_destroy_lock: 1472 pthread_spin_destroy(&iwuqp->lock); 1473 err_free_qp: 1474 printf("%s: failed to create QP\n", __func__); 1475 free(iwuqp); 1476 1477 errno = status; 1478 return NULL; 1479 } 1480 1481 /** 1482 * irdma_uquery_qp - query qp for some attribute 1483 * @qp: qp for the attributes query 1484 * @attr: to return the attributes 1485 * @attr_mask: mask of what is query for 1486 * @init_attr: initial attributes during create_qp 1487 */ 1488 int 1489 irdma_uquery_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask, 1490 struct ibv_qp_init_attr *init_attr) 1491 { 1492 struct ibv_query_qp cmd; 1493 1494 return ibv_cmd_query_qp(qp, attr, attr_mask, init_attr, &cmd, 1495 sizeof(cmd)); 1496 } 1497 1498 /** 1499 * irdma_umodify_qp - send qp modify to driver 1500 * @qp: qp to modify 1501 * @attr: attribute to modify 1502 * @attr_mask: mask of the attribute 1503 */ 1504 int 1505 irdma_umodify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask) 1506 { 1507 struct irdma_umodify_qp_resp resp = {}; 1508 struct ibv_modify_qp cmd = {}; 1509 struct irdma_modify_qp_cmd cmd_ex = {}; 1510 struct irdma_uvcontext *iwvctx; 1511 struct irdma_uqp *iwuqp; 1512 1513 iwuqp = container_of(qp, struct irdma_uqp, ibv_qp); 1514 iwvctx = container_of(qp->context, struct irdma_uvcontext, ibv_ctx); 1515 1516 if (iwuqp->qp.qp_caps & IRDMA_PUSH_MODE && attr_mask & IBV_QP_STATE && 1517 iwvctx->uk_attrs.hw_rev > IRDMA_GEN_1) { 1518 u64 offset; 1519 void *map; 1520 int ret; 1521 1522 ret = ibv_cmd_modify_qp_ex(qp, attr, attr_mask, &cmd_ex.ibv_cmd, 1523 sizeof(cmd_ex.ibv_cmd), 1524 sizeof(cmd_ex), &resp.ibv_resp, 1525 sizeof(resp.ibv_resp), 1526 sizeof(resp)); 1527 if (!ret) 1528 iwuqp->qp.rd_fence_rate = resp.rd_fence_rate; 1529 if (ret || !resp.push_valid) 1530 return ret; 1531 1532 if (iwuqp->qp.push_wqe) 1533 return ret; 1534 1535 offset = resp.push_wqe_mmap_key; 1536 map = irdma_mmap(qp->context->cmd_fd, offset); 1537 if (map == MAP_FAILED) 1538 return ret; 1539 1540 iwuqp->qp.push_wqe = map; 1541 1542 offset = resp.push_db_mmap_key; 1543 map = irdma_mmap(qp->context->cmd_fd, offset); 1544 if (map == MAP_FAILED) { 1545 irdma_munmap(iwuqp->qp.push_wqe); 1546 iwuqp->qp.push_wqe = NULL; 1547 printf("failed to map push page, errno %d\n", errno); 1548 return ret; 1549 } 1550 iwuqp->qp.push_wqe += resp.push_offset; 1551 iwuqp->qp.push_db = map + resp.push_offset; 1552 1553 return ret; 1554 } else { 1555 return ibv_cmd_modify_qp(qp, attr, attr_mask, &cmd, sizeof(cmd)); 1556 } 1557 } 1558 1559 static void 1560 irdma_issue_flush(struct ibv_qp *qp, bool sq_flush, bool rq_flush) 1561 { 1562 struct irdma_umodify_qp_resp resp = {}; 1563 struct irdma_modify_qp_cmd cmd_ex = {}; 1564 struct ibv_qp_attr attr = {}; 1565 1566 attr.qp_state = IBV_QPS_ERR; 1567 cmd_ex.sq_flush = sq_flush; 1568 cmd_ex.rq_flush = rq_flush; 1569 1570 ibv_cmd_modify_qp_ex(qp, &attr, IBV_QP_STATE, 1571 &cmd_ex.ibv_cmd, 1572 sizeof(cmd_ex.ibv_cmd), 1573 sizeof(cmd_ex), &resp.ibv_resp, 1574 sizeof(resp.ibv_resp), 1575 sizeof(resp)); 1576 } 1577 1578 /** 1579 * irdma_clean_cqes - clean cq entries for qp 1580 * @qp: qp for which completions are cleaned 1581 * @iwcq: cq to be cleaned 1582 */ 1583 static void 1584 irdma_clean_cqes(struct irdma_qp_uk *qp, struct irdma_ucq *iwucq) 1585 { 1586 struct irdma_cq_uk *ukcq = &iwucq->cq; 1587 int ret; 1588 1589 ret = pthread_spin_lock(&iwucq->lock); 1590 if (ret) 1591 return; 1592 1593 irdma_uk_clean_cq(qp, ukcq); 1594 pthread_spin_unlock(&iwucq->lock); 1595 } 1596 1597 /** 1598 * irdma_udestroy_qp - destroy qp 1599 * @qp: qp to destroy 1600 */ 1601 int 1602 irdma_udestroy_qp(struct ibv_qp *qp) 1603 { 1604 struct irdma_uqp *iwuqp; 1605 int ret; 1606 1607 iwuqp = container_of(qp, struct irdma_uqp, ibv_qp); 1608 ret = pthread_spin_destroy(&iwuqp->lock); 1609 if (ret) 1610 goto err; 1611 1612 ret = irdma_destroy_vmapped_qp(iwuqp); 1613 if (ret) 1614 goto err; 1615 1616 /* Clean any pending completions from the cq(s) */ 1617 if (iwuqp->send_cq) 1618 irdma_clean_cqes(&iwuqp->qp, iwuqp->send_cq); 1619 1620 if (iwuqp->recv_cq && iwuqp->recv_cq != iwuqp->send_cq) 1621 irdma_clean_cqes(&iwuqp->qp, iwuqp->recv_cq); 1622 1623 if (iwuqp->qp.sq_wrtrk_array) 1624 free(iwuqp->qp.sq_wrtrk_array); 1625 if (iwuqp->qp.rq_wrid_array) 1626 free(iwuqp->qp.rq_wrid_array); 1627 1628 irdma_free_hw_buf(iwuqp->qp.sq_base, iwuqp->buf_size); 1629 free(iwuqp); 1630 return 0; 1631 1632 err: 1633 printf("%s: failed to destroy QP, status %d\n", 1634 __func__, ret); 1635 return ret; 1636 } 1637 1638 /** 1639 * calc_type2_mw_stag - calculate type 2 MW stag 1640 * @rkey: desired rkey of the MW 1641 * @mw_rkey: type2 memory window rkey 1642 * 1643 * compute type2 memory window stag by taking lower 8 bits 1644 * of the desired rkey and leaving 24 bits if mw->rkey unchanged 1645 */ 1646 static inline u32 calc_type2_mw_stag(u32 rkey, u32 mw_rkey) { 1647 const u32 mask = 0xff; 1648 1649 return (rkey & mask) | (mw_rkey & ~mask); 1650 } 1651 1652 /** 1653 * irdma_post_send - post send wr for user application 1654 * @ib_qp: qp to post wr 1655 * @ib_wr: work request ptr 1656 * @bad_wr: return of bad wr if err 1657 */ 1658 int 1659 irdma_upost_send(struct ibv_qp *ib_qp, struct ibv_send_wr *ib_wr, 1660 struct ibv_send_wr **bad_wr) 1661 { 1662 struct irdma_post_sq_info info; 1663 struct irdma_uvcontext *iwvctx; 1664 struct irdma_uk_attrs *uk_attrs; 1665 struct irdma_uqp *iwuqp; 1666 bool reflush = false; 1667 int err = 0; 1668 1669 iwuqp = container_of(ib_qp, struct irdma_uqp, ibv_qp); 1670 iwvctx = container_of(ib_qp->context, struct irdma_uvcontext, ibv_ctx); 1671 uk_attrs = &iwvctx->uk_attrs; 1672 1673 err = pthread_spin_lock(&iwuqp->lock); 1674 if (err) 1675 return err; 1676 1677 if (!IRDMA_RING_MORE_WORK(iwuqp->qp.sq_ring) && 1678 ib_qp->state == IBV_QPS_ERR) 1679 reflush = true; 1680 1681 while (ib_wr) { 1682 memset(&info, 0, sizeof(info)); 1683 info.wr_id = (u64)(ib_wr->wr_id); 1684 if ((ib_wr->send_flags & IBV_SEND_SIGNALED) || 1685 iwuqp->sq_sig_all) 1686 info.signaled = true; 1687 if (ib_wr->send_flags & IBV_SEND_FENCE) 1688 info.read_fence = true; 1689 1690 switch (ib_wr->opcode) { 1691 case IBV_WR_SEND_WITH_IMM: 1692 if (iwuqp->qp.qp_caps & IRDMA_SEND_WITH_IMM) { 1693 info.imm_data_valid = true; 1694 info.imm_data = ntohl(ib_wr->imm_data); 1695 } else { 1696 err = EINVAL; 1697 break; 1698 } 1699 /* fallthrough */ 1700 case IBV_WR_SEND: 1701 case IBV_WR_SEND_WITH_INV: 1702 if (ib_wr->opcode == IBV_WR_SEND || 1703 ib_wr->opcode == IBV_WR_SEND_WITH_IMM) { 1704 if (ib_wr->send_flags & IBV_SEND_SOLICITED) 1705 info.op_type = IRDMA_OP_TYPE_SEND_SOL; 1706 else 1707 info.op_type = IRDMA_OP_TYPE_SEND; 1708 } else { 1709 if (ib_wr->send_flags & IBV_SEND_SOLICITED) 1710 info.op_type = IRDMA_OP_TYPE_SEND_SOL_INV; 1711 else 1712 info.op_type = IRDMA_OP_TYPE_SEND_INV; 1713 info.stag_to_inv = ib_wr->imm_data; 1714 } 1715 info.op.send.num_sges = ib_wr->num_sge; 1716 info.op.send.sg_list = (struct ibv_sge *)ib_wr->sg_list; 1717 if (ib_qp->qp_type == IBV_QPT_UD) { 1718 struct irdma_uah *ah = container_of(ib_wr->wr.ud.ah, 1719 struct irdma_uah, ibv_ah); 1720 1721 info.op.send.ah_id = ah->ah_id; 1722 info.op.send.qkey = ib_wr->wr.ud.remote_qkey; 1723 info.op.send.dest_qp = ib_wr->wr.ud.remote_qpn; 1724 } 1725 1726 if (ib_wr->send_flags & IBV_SEND_INLINE) 1727 err = irdma_uk_inline_send(&iwuqp->qp, &info, false); 1728 else 1729 err = irdma_uk_send(&iwuqp->qp, &info, false); 1730 break; 1731 case IBV_WR_RDMA_WRITE_WITH_IMM: 1732 if (iwuqp->qp.qp_caps & IRDMA_WRITE_WITH_IMM) { 1733 info.imm_data_valid = true; 1734 info.imm_data = ntohl(ib_wr->imm_data); 1735 } else { 1736 err = EINVAL; 1737 break; 1738 } 1739 /* fallthrough */ 1740 case IBV_WR_RDMA_WRITE: 1741 if (ib_wr->send_flags & IBV_SEND_SOLICITED) 1742 info.op_type = IRDMA_OP_TYPE_RDMA_WRITE_SOL; 1743 else 1744 info.op_type = IRDMA_OP_TYPE_RDMA_WRITE; 1745 1746 info.op.rdma_write.num_lo_sges = ib_wr->num_sge; 1747 info.op.rdma_write.lo_sg_list = ib_wr->sg_list; 1748 info.op.rdma_write.rem_addr.addr = ib_wr->wr.rdma.remote_addr; 1749 info.op.rdma_write.rem_addr.lkey = ib_wr->wr.rdma.rkey; 1750 if (ib_wr->send_flags & IBV_SEND_INLINE) 1751 err = irdma_uk_inline_rdma_write(&iwuqp->qp, &info, false); 1752 else 1753 err = irdma_uk_rdma_write(&iwuqp->qp, &info, false); 1754 break; 1755 case IBV_WR_RDMA_READ: 1756 if (ib_wr->num_sge > uk_attrs->max_hw_read_sges) { 1757 err = EINVAL; 1758 break; 1759 } 1760 info.op_type = IRDMA_OP_TYPE_RDMA_READ; 1761 info.op.rdma_read.rem_addr.addr = ib_wr->wr.rdma.remote_addr; 1762 info.op.rdma_read.rem_addr.lkey = ib_wr->wr.rdma.rkey; 1763 1764 info.op.rdma_read.lo_sg_list = ib_wr->sg_list; 1765 info.op.rdma_read.num_lo_sges = ib_wr->num_sge; 1766 err = irdma_uk_rdma_read(&iwuqp->qp, &info, false, false); 1767 break; 1768 case IBV_WR_BIND_MW: 1769 if (ib_qp->qp_type != IBV_QPT_RC) { 1770 err = EINVAL; 1771 break; 1772 } 1773 info.op_type = IRDMA_OP_TYPE_BIND_MW; 1774 info.op.bind_window.mr_stag = ib_wr->bind_mw.bind_info.mr->rkey; 1775 if (ib_wr->bind_mw.mw->type == IBV_MW_TYPE_1) { 1776 info.op.bind_window.mem_window_type_1 = true; 1777 info.op.bind_window.mw_stag = ib_wr->bind_mw.rkey; 1778 } else { 1779 struct verbs_mr *vmr = verbs_get_mr(ib_wr->bind_mw.bind_info.mr); 1780 1781 if (vmr->access & IBV_ACCESS_ZERO_BASED) { 1782 err = EINVAL; 1783 break; 1784 } 1785 info.op.bind_window.mw_stag = 1786 calc_type2_mw_stag(ib_wr->bind_mw.rkey, ib_wr->bind_mw.mw->rkey); 1787 ib_wr->bind_mw.mw->rkey = info.op.bind_window.mw_stag; 1788 1789 } 1790 1791 if (ib_wr->bind_mw.bind_info.mw_access_flags & IBV_ACCESS_ZERO_BASED) { 1792 info.op.bind_window.addressing_type = IRDMA_ADDR_TYPE_ZERO_BASED; 1793 info.op.bind_window.va = NULL; 1794 } else { 1795 info.op.bind_window.addressing_type = IRDMA_ADDR_TYPE_VA_BASED; 1796 info.op.bind_window.va = (void *)(uintptr_t)ib_wr->bind_mw.bind_info.addr; 1797 } 1798 info.op.bind_window.bind_len = ib_wr->bind_mw.bind_info.length; 1799 info.op.bind_window.ena_reads = 1800 (ib_wr->bind_mw.bind_info.mw_access_flags & IBV_ACCESS_REMOTE_READ) ? 1 : 0; 1801 info.op.bind_window.ena_writes = 1802 (ib_wr->bind_mw.bind_info.mw_access_flags & IBV_ACCESS_REMOTE_WRITE) ? 1 : 0; 1803 1804 err = irdma_uk_mw_bind(&iwuqp->qp, &info, false); 1805 break; 1806 case IBV_WR_LOCAL_INV: 1807 info.op_type = IRDMA_OP_TYPE_INV_STAG; 1808 info.op.inv_local_stag.target_stag = ib_wr->imm_data; 1809 err = irdma_uk_stag_local_invalidate(&iwuqp->qp, &info, true); 1810 break; 1811 default: 1812 /* error */ 1813 err = EINVAL; 1814 printf("%s: post work request failed, invalid opcode: 0x%x\n", 1815 __func__, ib_wr->opcode); 1816 break; 1817 } 1818 if (err) 1819 break; 1820 1821 ib_wr = ib_wr->next; 1822 } 1823 1824 if (err) 1825 *bad_wr = ib_wr; 1826 1827 irdma_uk_qp_post_wr(&iwuqp->qp); 1828 if (reflush) 1829 irdma_issue_flush(ib_qp, 1, 0); 1830 1831 pthread_spin_unlock(&iwuqp->lock); 1832 1833 return err; 1834 } 1835 1836 /** 1837 * irdma_post_recv - post receive wr for user application 1838 * @ib_wr: work request for receive 1839 * @bad_wr: bad wr caused an error 1840 */ 1841 int 1842 irdma_upost_recv(struct ibv_qp *ib_qp, struct ibv_recv_wr *ib_wr, 1843 struct ibv_recv_wr **bad_wr) 1844 { 1845 struct irdma_post_rq_info post_recv = {}; 1846 struct irdma_uqp *iwuqp; 1847 bool reflush = false; 1848 int err = 0; 1849 1850 iwuqp = container_of(ib_qp, struct irdma_uqp, ibv_qp); 1851 err = pthread_spin_lock(&iwuqp->lock); 1852 if (err) 1853 return err; 1854 1855 if (!IRDMA_RING_MORE_WORK(iwuqp->qp.rq_ring) && 1856 ib_qp->state == IBV_QPS_ERR) 1857 reflush = true; 1858 1859 while (ib_wr) { 1860 if (ib_wr->num_sge > iwuqp->qp.max_rq_frag_cnt) { 1861 *bad_wr = ib_wr; 1862 err = EINVAL; 1863 goto error; 1864 } 1865 post_recv.num_sges = ib_wr->num_sge; 1866 post_recv.wr_id = ib_wr->wr_id; 1867 post_recv.sg_list = ib_wr->sg_list; 1868 err = irdma_uk_post_receive(&iwuqp->qp, &post_recv); 1869 if (err) { 1870 *bad_wr = ib_wr; 1871 goto error; 1872 } 1873 1874 if (reflush) 1875 irdma_issue_flush(ib_qp, 0, 1); 1876 1877 ib_wr = ib_wr->next; 1878 } 1879 error: 1880 pthread_spin_unlock(&iwuqp->lock); 1881 1882 return err; 1883 } 1884 1885 /** 1886 * irdma_ucreate_ah - create address handle associated with a pd 1887 * @ibpd: pd for the address handle 1888 * @attr: attributes of address handle 1889 */ 1890 struct ibv_ah * 1891 irdma_ucreate_ah(struct ibv_pd *ibpd, struct ibv_ah_attr *attr) 1892 { 1893 struct irdma_uah *ah; 1894 union ibv_gid sgid; 1895 struct irdma_ucreate_ah_resp resp = {}; 1896 int err; 1897 1898 if (ibv_query_gid(ibpd->context, attr->port_num, attr->grh.sgid_index, 1899 &sgid)) { 1900 fprintf(stderr, "irdma: Error from ibv_query_gid.\n"); 1901 errno = ENOENT; 1902 return NULL; 1903 } 1904 1905 ah = calloc(1, sizeof(*ah)); 1906 if (!ah) 1907 return NULL; 1908 1909 err = ibv_cmd_create_ah(ibpd, &ah->ibv_ah, attr, &resp.ibv_resp, 1910 sizeof(resp)); 1911 if (err) { 1912 free(ah); 1913 errno = err; 1914 return NULL; 1915 } 1916 1917 ah->ah_id = resp.ah_id; 1918 1919 return &ah->ibv_ah; 1920 } 1921 1922 /** 1923 * irdma_udestroy_ah - destroy the address handle 1924 * @ibah: address handle 1925 */ 1926 int 1927 irdma_udestroy_ah(struct ibv_ah *ibah) 1928 { 1929 struct irdma_uah *ah; 1930 int ret; 1931 1932 ah = container_of(ibah, struct irdma_uah, ibv_ah); 1933 1934 ret = ibv_cmd_destroy_ah(ibah); 1935 if (ret) 1936 return ret; 1937 1938 free(ah); 1939 1940 return 0; 1941 } 1942 1943 /** 1944 * irdma_uattach_mcast - Attach qp to multicast group implemented 1945 * @qp: The queue pair 1946 * @gid:The Global ID for multicast group 1947 * @lid: The Local ID 1948 */ 1949 int 1950 irdma_uattach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, 1951 uint16_t lid) 1952 { 1953 return ibv_cmd_attach_mcast(qp, gid, lid); 1954 } 1955 1956 /** 1957 * irdma_udetach_mcast - Detach qp from multicast group 1958 * @qp: The queue pair 1959 * @gid:The Global ID for multicast group 1960 * @lid: The Local ID 1961 */ 1962 int 1963 irdma_udetach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, 1964 uint16_t lid) 1965 { 1966 return ibv_cmd_detach_mcast(qp, gid, lid); 1967 } 1968 1969 /** 1970 * irdma_uresize_cq - resizes a cq 1971 * @cq: cq to resize 1972 * @cqe: the number of cqes of the new cq 1973 */ 1974 int 1975 irdma_uresize_cq(struct ibv_cq *cq, int cqe) 1976 { 1977 struct irdma_uvcontext *iwvctx; 1978 struct irdma_uk_attrs *uk_attrs; 1979 struct irdma_uresize_cq cmd = {}; 1980 struct ibv_resize_cq_resp resp = {}; 1981 struct irdma_ureg_mr reg_mr_cmd = {}; 1982 struct ibv_reg_mr_resp reg_mr_resp = {}; 1983 struct irdma_cq_buf *cq_buf = NULL; 1984 struct irdma_cqe *cq_base = NULL; 1985 struct verbs_mr new_mr = {}; 1986 struct irdma_ucq *iwucq; 1987 size_t cq_size; 1988 u32 cq_pages; 1989 int cqe_needed; 1990 int ret = 0; 1991 1992 iwucq = container_of(cq, struct irdma_ucq, verbs_cq.cq); 1993 iwvctx = container_of(cq->context, struct irdma_uvcontext, ibv_ctx); 1994 uk_attrs = &iwvctx->uk_attrs; 1995 1996 if (!(uk_attrs->feature_flags & IRDMA_FEATURE_CQ_RESIZE)) 1997 return EOPNOTSUPP; 1998 1999 if (cqe < uk_attrs->min_hw_cq_size || cqe > uk_attrs->max_hw_cq_size - 1) 2000 return EINVAL; 2001 2002 cqe_needed = get_cq_size(cqe, uk_attrs->hw_rev); 2003 if (cqe_needed == iwucq->cq.cq_size) 2004 return 0; 2005 2006 cq_size = get_cq_total_bytes(cqe_needed); 2007 cq_pages = cq_size >> IRDMA_HW_PAGE_SHIFT; 2008 cq_base = irdma_alloc_hw_buf(cq_size); 2009 if (!cq_base) 2010 return ENOMEM; 2011 2012 memset(cq_base, 0, cq_size); 2013 2014 cq_buf = malloc(sizeof(*cq_buf)); 2015 if (!cq_buf) { 2016 ret = ENOMEM; 2017 goto err_buf; 2018 } 2019 2020 new_mr.ibv_mr.pd = iwucq->vmr.ibv_mr.pd; 2021 reg_mr_cmd.reg_type = IRDMA_MEMREG_TYPE_CQ; 2022 reg_mr_cmd.cq_pages = cq_pages; 2023 2024 ret = ibv_cmd_reg_mr(new_mr.ibv_mr.pd, cq_base, cq_size, 2025 (uintptr_t)cq_base, IBV_ACCESS_LOCAL_WRITE, 2026 &new_mr.ibv_mr, ®_mr_cmd.ibv_cmd, sizeof(reg_mr_cmd), 2027 ®_mr_resp, sizeof(reg_mr_resp)); 2028 if (ret) 2029 goto err_dereg_mr; 2030 2031 ret = pthread_spin_lock(&iwucq->lock); 2032 if (ret) 2033 goto err_lock; 2034 2035 cmd.user_cq_buffer = (__u64) ((uintptr_t)cq_base); 2036 ret = ibv_cmd_resize_cq(&iwucq->verbs_cq.cq, cqe_needed, &cmd.ibv_cmd, 2037 sizeof(cmd), &resp, sizeof(resp)); 2038 if (ret) 2039 goto err_resize; 2040 2041 memcpy(&cq_buf->cq, &iwucq->cq, sizeof(cq_buf->cq)); 2042 cq_buf->vmr = iwucq->vmr; 2043 iwucq->vmr = new_mr; 2044 irdma_uk_cq_resize(&iwucq->cq, cq_base, cqe_needed); 2045 iwucq->verbs_cq.cq.cqe = cqe; 2046 LIST_INSERT_HEAD(&iwucq->resize_list, cq_buf, list); 2047 2048 pthread_spin_unlock(&iwucq->lock); 2049 2050 return ret; 2051 2052 err_resize: 2053 pthread_spin_unlock(&iwucq->lock); 2054 err_lock: 2055 ibv_cmd_dereg_mr(&new_mr.ibv_mr); 2056 err_dereg_mr: 2057 free(cq_buf); 2058 err_buf: 2059 fprintf(stderr, "failed to resize CQ cq_id=%d ret=%d\n", iwucq->cq.cq_id, ret); 2060 irdma_free_hw_buf(cq_base, cq_size); 2061 return ret; 2062 } 2063