1 /*- 2 * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB 3 * 4 * Copyright (C) 2019 - 2023 Intel Corporation 5 * 6 * This software is available to you under a choice of one of two 7 * licenses. You may choose to be licensed under the terms of the GNU 8 * General Public License (GPL) Version 2, available from the file 9 * COPYING in the main directory of this source tree, or the 10 * OpenFabrics.org BSD license below: 11 * 12 * Redistribution and use in source and binary forms, with or 13 * without modification, are permitted provided that the following 14 * conditions are met: 15 * 16 * - Redistributions of source code must retain the above 17 * copyright notice, this list of conditions and the following 18 * disclaimer. 19 * 20 * - Redistributions in binary form must reproduce the above 21 * copyright notice, this list of conditions and the following 22 * disclaimer in the documentation and/or other materials 23 * provided with the distribution. 24 * 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 32 * SOFTWARE. 33 */ 34 /*$FreeBSD$*/ 35 36 #include <config.h> 37 #include <stdlib.h> 38 #include <stdio.h> 39 #include <string.h> 40 #include <unistd.h> 41 #include <signal.h> 42 #include <errno.h> 43 #include <sys/param.h> 44 #include <sys/mman.h> 45 #include <netinet/in.h> 46 #include <sys/stat.h> 47 #include <fcntl.h> 48 #include <stdbool.h> 49 #include <infiniband/opcode.h> 50 51 #include "irdma_umain.h" 52 #include "abi.h" 53 54 static inline void 55 print_fw_ver(uint64_t fw_ver, char *str, size_t len) 56 { 57 uint16_t major, minor; 58 59 major = fw_ver >> 32 & 0xffff; 60 minor = fw_ver & 0xffff; 61 62 snprintf(str, len, "%d.%d", major, minor); 63 } 64 65 /** 66 * irdma_uquery_device_ex - query device attributes including extended properties 67 * @context: user context for the device 68 * @input: extensible input struct for ibv_query_device_ex verb 69 * @attr: extended device attribute struct 70 * @attr_size: size of extended device attribute struct 71 **/ 72 int 73 irdma_uquery_device_ex(struct ibv_context *context, 74 const struct ibv_query_device_ex_input *input, 75 struct ibv_device_attr_ex *attr, size_t attr_size) 76 { 77 struct irdma_query_device_ex cmd = {}; 78 struct irdma_query_device_ex_resp resp = {}; 79 uint64_t fw_ver; 80 int ret; 81 82 ret = ibv_cmd_query_device_ex(context, input, attr, attr_size, &fw_ver, 83 &cmd.ibv_cmd, sizeof(cmd.ibv_cmd), sizeof(cmd), 84 &resp.ibv_resp, sizeof(resp.ibv_resp), sizeof(resp)); 85 if (ret) 86 return ret; 87 88 print_fw_ver(fw_ver, attr->orig_attr.fw_ver, sizeof(attr->orig_attr.fw_ver)); 89 90 return 0; 91 } 92 93 /** 94 * irdma_uquery_device - call driver to query device for max resources 95 * @context: user context for the device 96 * @attr: where to save all the mx resources from the driver 97 **/ 98 int 99 irdma_uquery_device(struct ibv_context *context, struct ibv_device_attr *attr) 100 { 101 struct ibv_query_device cmd; 102 uint64_t fw_ver; 103 int ret; 104 105 ret = ibv_cmd_query_device(context, attr, &fw_ver, &cmd, sizeof(cmd)); 106 if (ret) 107 return ret; 108 109 print_fw_ver(fw_ver, attr->fw_ver, sizeof(attr->fw_ver)); 110 111 return 0; 112 } 113 114 /** 115 * irdma_uquery_port - get port attributes (msg size, lnk, mtu...) 116 * @context: user context of the device 117 * @port: port for the attributes 118 * @attr: to return port attributes 119 **/ 120 int 121 irdma_uquery_port(struct ibv_context *context, uint8_t port, 122 struct ibv_port_attr *attr) 123 { 124 struct ibv_query_port cmd; 125 126 return ibv_cmd_query_port(context, port, attr, &cmd, sizeof(cmd)); 127 } 128 129 /** 130 * irdma_ualloc_pd - allocates protection domain and return pd ptr 131 * @context: user context of the device 132 **/ 133 struct ibv_pd * 134 irdma_ualloc_pd(struct ibv_context *context) 135 { 136 struct ibv_alloc_pd cmd; 137 struct irdma_ualloc_pd_resp resp = {}; 138 struct irdma_upd *iwupd; 139 int err; 140 141 iwupd = calloc(1, sizeof(*iwupd)); 142 if (!iwupd) 143 return NULL; 144 145 err = ibv_cmd_alloc_pd(context, &iwupd->ibv_pd, &cmd, sizeof(cmd), 146 &resp.ibv_resp, sizeof(resp)); 147 if (err) 148 goto err_free; 149 150 iwupd->pd_id = resp.pd_id; 151 152 return &iwupd->ibv_pd; 153 154 err_free: 155 free(iwupd); 156 errno = err; 157 return NULL; 158 } 159 160 /** 161 * irdma_ufree_pd - free pd resources 162 * @pd: pd to free resources 163 */ 164 int 165 irdma_ufree_pd(struct ibv_pd *pd) 166 { 167 struct irdma_uvcontext *iwvctx = container_of(pd->context, struct irdma_uvcontext, ibv_ctx); 168 struct irdma_upd *iwupd; 169 int ret; 170 171 iwupd = container_of(pd, struct irdma_upd, ibv_pd); 172 ret = ibv_cmd_dealloc_pd(pd); 173 if (ret) 174 return ret; 175 176 free(iwupd); 177 178 return 0; 179 } 180 181 /** 182 * irdma_ureg_mr - register user memory region 183 * @pd: pd for the mr 184 * @addr: user address of the memory region 185 * @length: length of the memory 186 * @hca_va: hca_va 187 * @access: access allowed on this mr 188 */ 189 struct ibv_mr * 190 irdma_ureg_mr(struct ibv_pd *pd, void *addr, size_t length, 191 int access) 192 { 193 struct verbs_mr *vmr; 194 struct irdma_ureg_mr cmd = {}; 195 struct ibv_reg_mr_resp resp; 196 int err; 197 198 vmr = malloc(sizeof(*vmr)); 199 if (!vmr) 200 return NULL; 201 202 cmd.reg_type = IRDMA_MEMREG_TYPE_MEM; 203 err = ibv_cmd_reg_mr(pd, addr, length, 204 (uintptr_t)addr, access, &vmr->ibv_mr, &cmd.ibv_cmd, 205 sizeof(cmd), &resp, sizeof(resp)); 206 if (err) { 207 free(vmr); 208 errno = err; 209 return NULL; 210 } 211 212 return &vmr->ibv_mr; 213 } 214 215 /* 216 * irdma_urereg_mr - re-register memory region @vmr: mr that was allocated @flags: bit mask to indicate which of the 217 * attr's of MR modified @pd: pd of the mr @addr: user address of the memory region @length: length of the memory 218 * @access: access allowed on this mr 219 */ 220 int 221 irdma_urereg_mr(struct verbs_mr *vmr, int flags, struct ibv_pd *pd, 222 void *addr, size_t length, int access) 223 { 224 struct irdma_urereg_mr cmd = {}; 225 struct ibv_rereg_mr_resp resp; 226 227 cmd.reg_type = IRDMA_MEMREG_TYPE_MEM; 228 return ibv_cmd_rereg_mr(&vmr->ibv_mr, flags, addr, length, (uintptr_t)addr, 229 access, pd, &cmd.ibv_cmd, sizeof(cmd), &resp, 230 sizeof(resp)); 231 } 232 233 /** 234 * irdma_udereg_mr - re-register memory region 235 * @mr: mr that was allocated 236 */ 237 int 238 irdma_udereg_mr(struct ibv_mr *mr) 239 { 240 struct verbs_mr *vmr; 241 int ret; 242 243 vmr = container_of(mr, struct verbs_mr, ibv_mr); 244 245 ret = ibv_cmd_dereg_mr(mr); 246 if (ret) 247 return ret; 248 249 return 0; 250 } 251 252 /** 253 * irdma_ualloc_mw - allocate memory window 254 * @pd: protection domain 255 * @type: memory window type 256 */ 257 struct ibv_mw * 258 irdma_ualloc_mw(struct ibv_pd *pd, enum ibv_mw_type type) 259 { 260 struct ibv_mw *mw; 261 struct ibv_alloc_mw cmd; 262 struct ibv_alloc_mw_resp resp; 263 int err; 264 265 mw = calloc(1, sizeof(*mw)); 266 if (!mw) 267 return NULL; 268 269 err = ibv_cmd_alloc_mw(pd, type, mw, &cmd, sizeof(cmd), &resp, 270 sizeof(resp)); 271 if (err) { 272 printf("%s: Failed to alloc memory window\n", 273 __func__); 274 free(mw); 275 errno = err; 276 return NULL; 277 } 278 279 return mw; 280 } 281 282 /** 283 * irdma_ubind_mw - bind a memory window 284 * @qp: qp to post WR 285 * @mw: memory window to bind 286 * @mw_bind: bind info 287 */ 288 int 289 irdma_ubind_mw(struct ibv_qp *qp, struct ibv_mw *mw, 290 struct ibv_mw_bind *mw_bind) 291 { 292 struct ibv_mw_bind_info *bind_info = &mw_bind->bind_info; 293 struct verbs_mr *vmr; 294 295 struct ibv_send_wr wr = {}; 296 struct ibv_send_wr *bad_wr; 297 int err; 298 299 if (!bind_info->mr && (bind_info->addr || bind_info->length)) 300 return EINVAL; 301 302 if (bind_info->mr) { 303 vmr = verbs_get_mr(bind_info->mr); 304 if (vmr->mr_type != IBV_MR_TYPE_MR) 305 return ENOTSUP; 306 307 if (vmr->access & IBV_ACCESS_ZERO_BASED) 308 return EINVAL; 309 310 if (mw->pd != bind_info->mr->pd) 311 return EPERM; 312 } 313 314 wr.opcode = IBV_WR_BIND_MW; 315 wr.bind_mw.bind_info = mw_bind->bind_info; 316 wr.bind_mw.mw = mw; 317 wr.bind_mw.rkey = ibv_inc_rkey(mw->rkey); 318 319 wr.wr_id = mw_bind->wr_id; 320 wr.send_flags = mw_bind->send_flags; 321 322 err = irdma_upost_send(qp, &wr, &bad_wr); 323 if (!err) 324 mw->rkey = wr.bind_mw.rkey; 325 326 return err; 327 } 328 329 /** 330 * irdma_udealloc_mw - deallocate memory window 331 * @mw: memory window to dealloc 332 */ 333 int 334 irdma_udealloc_mw(struct ibv_mw *mw) 335 { 336 int ret; 337 struct ibv_dealloc_mw cmd; 338 339 ret = ibv_cmd_dealloc_mw(mw, &cmd, sizeof(cmd)); 340 if (ret) 341 return ret; 342 free(mw); 343 344 return 0; 345 } 346 347 static void * 348 irdma_alloc_hw_buf(size_t size) 349 { 350 void *buf; 351 352 buf = memalign(IRDMA_HW_PAGE_SIZE, size); 353 354 if (!buf) 355 return NULL; 356 if (ibv_dontfork_range(buf, size)) { 357 free(buf); 358 return NULL; 359 } 360 361 return buf; 362 } 363 364 static void 365 irdma_free_hw_buf(void *buf, size_t size) 366 { 367 ibv_dofork_range(buf, size); 368 free(buf); 369 } 370 371 /** 372 * get_cq_size - returns actual cqe needed by HW 373 * @ncqe: minimum cqes requested by application 374 * @hw_rev: HW generation 375 * @cqe_64byte_ena: enable 64byte cqe 376 */ 377 static inline int 378 get_cq_size(int ncqe, u8 hw_rev, bool cqe_64byte_ena) 379 { 380 ncqe++; 381 382 /* Completions with immediate require 1 extra entry */ 383 if (!cqe_64byte_ena && hw_rev > IRDMA_GEN_1) 384 ncqe *= 2; 385 386 if (ncqe < IRDMA_U_MINCQ_SIZE) 387 ncqe = IRDMA_U_MINCQ_SIZE; 388 389 return ncqe; 390 } 391 392 static inline size_t get_cq_total_bytes(u32 cq_size, bool cqe_64byte_ena){ 393 if (cqe_64byte_ena) 394 return roundup(cq_size * sizeof(struct irdma_extended_cqe), IRDMA_HW_PAGE_SIZE); 395 else 396 return roundup(cq_size * sizeof(struct irdma_cqe), IRDMA_HW_PAGE_SIZE); 397 } 398 399 /** 400 * ucreate_cq - irdma util function to create a CQ 401 * @context: ibv context 402 * @attr_ex: CQ init attributes 403 * @ext_cq: flag to create an extendable or normal CQ 404 */ 405 static struct ibv_cq_ex * 406 ucreate_cq(struct ibv_context *context, 407 struct ibv_cq_init_attr_ex *attr_ex, 408 bool ext_cq) 409 { 410 struct irdma_cq_uk_init_info info = {}; 411 struct irdma_ureg_mr reg_mr_cmd = {}; 412 struct irdma_ucreate_cq_ex cmd = {}; 413 struct irdma_ucreate_cq_ex_resp resp = {}; 414 struct ibv_reg_mr_resp reg_mr_resp = {}; 415 struct irdma_ureg_mr reg_mr_shadow_cmd = {}; 416 struct ibv_reg_mr_resp reg_mr_shadow_resp = {}; 417 struct irdma_uk_attrs *uk_attrs; 418 struct irdma_uvcontext *iwvctx; 419 struct irdma_ucq *iwucq; 420 size_t total_size; 421 u32 cq_pages; 422 int ret, ncqe; 423 u8 hw_rev; 424 bool cqe_64byte_ena; 425 426 iwvctx = container_of(context, struct irdma_uvcontext, ibv_ctx); 427 uk_attrs = &iwvctx->uk_attrs; 428 hw_rev = uk_attrs->hw_rev; 429 430 if (ext_cq) { 431 u32 supported_flags = IRDMA_STANDARD_WC_FLAGS_EX; 432 433 if (hw_rev == IRDMA_GEN_1 || attr_ex->wc_flags & ~supported_flags) { 434 errno = EOPNOTSUPP; 435 return NULL; 436 } 437 } 438 439 if (attr_ex->cqe < uk_attrs->min_hw_cq_size || attr_ex->cqe > uk_attrs->max_hw_cq_size - 1) { 440 errno = EINVAL; 441 return NULL; 442 } 443 444 /* save the cqe requested by application */ 445 ncqe = attr_ex->cqe; 446 447 iwucq = calloc(1, sizeof(*iwucq)); 448 if (!iwucq) 449 return NULL; 450 451 ret = pthread_spin_init(&iwucq->lock, PTHREAD_PROCESS_PRIVATE); 452 if (ret) { 453 free(iwucq); 454 errno = ret; 455 return NULL; 456 } 457 458 cqe_64byte_ena = uk_attrs->feature_flags & IRDMA_FEATURE_64_BYTE_CQE ? true : false; 459 info.cq_size = get_cq_size(attr_ex->cqe, hw_rev, cqe_64byte_ena); 460 iwucq->comp_vector = attr_ex->comp_vector; 461 LIST_INIT(&iwucq->resize_list); 462 total_size = get_cq_total_bytes(info.cq_size, cqe_64byte_ena); 463 cq_pages = total_size >> IRDMA_HW_PAGE_SHIFT; 464 465 if (!(uk_attrs->feature_flags & IRDMA_FEATURE_CQ_RESIZE)) 466 total_size = (cq_pages << IRDMA_HW_PAGE_SHIFT) + IRDMA_DB_SHADOW_AREA_SIZE; 467 468 iwucq->buf_size = total_size; 469 info.cq_base = irdma_alloc_hw_buf(total_size); 470 if (!info.cq_base) { 471 ret = ENOMEM; 472 goto err_cq_base; 473 } 474 475 memset(info.cq_base, 0, total_size); 476 reg_mr_cmd.reg_type = IRDMA_MEMREG_TYPE_CQ; 477 reg_mr_cmd.cq_pages = cq_pages; 478 479 ret = ibv_cmd_reg_mr(&iwvctx->iwupd->ibv_pd, info.cq_base, 480 total_size, (uintptr_t)info.cq_base, 481 IBV_ACCESS_LOCAL_WRITE, &iwucq->vmr.ibv_mr, 482 ®_mr_cmd.ibv_cmd, sizeof(reg_mr_cmd), 483 ®_mr_resp, sizeof(reg_mr_resp)); 484 if (ret) 485 goto err_dereg_mr; 486 487 iwucq->vmr.ibv_mr.pd = &iwvctx->iwupd->ibv_pd; 488 489 if (uk_attrs->feature_flags & IRDMA_FEATURE_CQ_RESIZE) { 490 info.shadow_area = irdma_alloc_hw_buf(IRDMA_DB_SHADOW_AREA_SIZE); 491 if (!info.shadow_area) { 492 ret = ENOMEM; 493 goto err_alloc_shadow; 494 } 495 496 memset(info.shadow_area, 0, IRDMA_DB_SHADOW_AREA_SIZE); 497 reg_mr_shadow_cmd.reg_type = IRDMA_MEMREG_TYPE_CQ; 498 reg_mr_shadow_cmd.cq_pages = 1; 499 500 ret = ibv_cmd_reg_mr(&iwvctx->iwupd->ibv_pd, info.shadow_area, 501 IRDMA_DB_SHADOW_AREA_SIZE, (uintptr_t)info.shadow_area, 502 IBV_ACCESS_LOCAL_WRITE, &iwucq->vmr_shadow_area.ibv_mr, 503 ®_mr_shadow_cmd.ibv_cmd, sizeof(reg_mr_shadow_cmd), 504 ®_mr_shadow_resp, sizeof(reg_mr_shadow_resp)); 505 if (ret) { 506 irdma_free_hw_buf(info.shadow_area, IRDMA_DB_SHADOW_AREA_SIZE); 507 goto err_alloc_shadow; 508 } 509 510 iwucq->vmr_shadow_area.ibv_mr.pd = &iwvctx->iwupd->ibv_pd; 511 512 } else { 513 info.shadow_area = (__le64 *) ((u8 *)info.cq_base + (cq_pages << IRDMA_HW_PAGE_SHIFT)); 514 } 515 516 attr_ex->cqe = info.cq_size; 517 cmd.user_cq_buf = (__u64) ((uintptr_t)info.cq_base); 518 cmd.user_shadow_area = (__u64) ((uintptr_t)info.shadow_area); 519 520 ret = ibv_cmd_create_cq_ex(context, attr_ex, &iwucq->verbs_cq.cq_ex, 521 &cmd.ibv_cmd, sizeof(cmd.ibv_cmd), sizeof(cmd), &resp.ibv_resp, 522 sizeof(resp.ibv_resp), sizeof(resp)); 523 attr_ex->cqe = ncqe; 524 if (ret) 525 goto err_create_cq; 526 527 if (ext_cq) 528 irdma_ibvcq_ex_fill_priv_funcs(iwucq, attr_ex); 529 info.cq_id = resp.cq_id; 530 /* Do not report the CQE's reserved for immediate and burned by HW */ 531 iwucq->verbs_cq.cq.cqe = ncqe; 532 if (cqe_64byte_ena) 533 info.avoid_mem_cflct = true; 534 info.cqe_alloc_db = (u32 *)((u8 *)iwvctx->db + IRDMA_DB_CQ_OFFSET); 535 irdma_uk_cq_init(&iwucq->cq, &info); 536 return &iwucq->verbs_cq.cq_ex; 537 538 err_create_cq: 539 if (iwucq->vmr_shadow_area.ibv_mr.handle) { 540 ibv_cmd_dereg_mr(&iwucq->vmr_shadow_area.ibv_mr); 541 irdma_free_hw_buf(info.shadow_area, IRDMA_DB_SHADOW_AREA_SIZE); 542 } 543 err_alloc_shadow: 544 ibv_cmd_dereg_mr(&iwucq->vmr.ibv_mr); 545 err_dereg_mr: 546 irdma_free_hw_buf(info.cq_base, total_size); 547 err_cq_base: 548 printf("%s: failed to initialize CQ\n", __func__); 549 pthread_spin_destroy(&iwucq->lock); 550 551 free(iwucq); 552 553 errno = ret; 554 return NULL; 555 } 556 557 struct ibv_cq * 558 irdma_ucreate_cq(struct ibv_context *context, int cqe, 559 struct ibv_comp_channel *channel, 560 int comp_vector) 561 { 562 struct ibv_cq_init_attr_ex attr_ex = { 563 .cqe = cqe, 564 .channel = channel, 565 .comp_vector = comp_vector, 566 }; 567 struct ibv_cq_ex *ibvcq_ex; 568 569 ibvcq_ex = ucreate_cq(context, &attr_ex, false); 570 571 return ibvcq_ex ? ibv_cq_ex_to_cq(ibvcq_ex) : NULL; 572 } 573 574 struct ibv_cq_ex * 575 irdma_ucreate_cq_ex(struct ibv_context *context, 576 struct ibv_cq_init_attr_ex *attr_ex) 577 { 578 return ucreate_cq(context, attr_ex, true); 579 } 580 581 /** 582 * irdma_free_cq_buf - free memory for cq buffer 583 * @cq_buf: cq buf to free 584 */ 585 static void 586 irdma_free_cq_buf(struct irdma_cq_buf *cq_buf) 587 { 588 ibv_cmd_dereg_mr(&cq_buf->vmr.ibv_mr); 589 irdma_free_hw_buf(cq_buf->cq.cq_base, cq_buf->buf_size); 590 free(cq_buf); 591 } 592 593 /** 594 * irdma_process_resize_list - process the cq list to remove buffers 595 * @iwucq: cq which owns the list 596 * @lcqe_buf: cq buf where the last cqe is found 597 */ 598 static int 599 irdma_process_resize_list(struct irdma_ucq *iwucq, 600 struct irdma_cq_buf *lcqe_buf) 601 { 602 struct irdma_cq_buf *cq_buf, *next; 603 int cq_cnt = 0; 604 605 LIST_FOREACH_SAFE(cq_buf, &iwucq->resize_list, list, next) { 606 if (cq_buf == lcqe_buf) 607 return cq_cnt; 608 609 LIST_REMOVE(cq_buf, list); 610 irdma_free_cq_buf(cq_buf); 611 cq_cnt++; 612 } 613 614 return cq_cnt; 615 } 616 617 /** 618 * irdma_udestroy_cq - destroys cq 619 * @cq: ptr to cq to be destroyed 620 */ 621 int 622 irdma_udestroy_cq(struct ibv_cq *cq) 623 { 624 struct irdma_uk_attrs *uk_attrs; 625 struct irdma_uvcontext *iwvctx; 626 struct irdma_ucq *iwucq; 627 int ret; 628 629 iwucq = container_of(cq, struct irdma_ucq, verbs_cq.cq); 630 iwvctx = container_of(cq->context, struct irdma_uvcontext, ibv_ctx); 631 uk_attrs = &iwvctx->uk_attrs; 632 633 ret = pthread_spin_destroy(&iwucq->lock); 634 if (ret) 635 goto err; 636 637 irdma_process_resize_list(iwucq, NULL); 638 ret = ibv_cmd_destroy_cq(cq); 639 if (ret) 640 goto err; 641 642 ibv_cmd_dereg_mr(&iwucq->vmr.ibv_mr); 643 irdma_free_hw_buf(iwucq->cq.cq_base, iwucq->buf_size); 644 645 if (uk_attrs->feature_flags & IRDMA_FEATURE_CQ_RESIZE) { 646 ibv_cmd_dereg_mr(&iwucq->vmr_shadow_area.ibv_mr); 647 irdma_free_hw_buf(iwucq->cq.shadow_area, IRDMA_DB_SHADOW_AREA_SIZE); 648 } 649 free(iwucq); 650 return 0; 651 652 err: 653 return ret; 654 } 655 656 static enum ibv_wc_status 657 irdma_flush_err_to_ib_wc_status(enum irdma_flush_opcode opcode) 658 { 659 switch (opcode) { 660 case FLUSH_PROT_ERR: 661 return IBV_WC_LOC_PROT_ERR; 662 case FLUSH_REM_ACCESS_ERR: 663 return IBV_WC_REM_ACCESS_ERR; 664 case FLUSH_LOC_QP_OP_ERR: 665 return IBV_WC_LOC_QP_OP_ERR; 666 case FLUSH_REM_OP_ERR: 667 return IBV_WC_REM_OP_ERR; 668 case FLUSH_LOC_LEN_ERR: 669 return IBV_WC_LOC_LEN_ERR; 670 case FLUSH_GENERAL_ERR: 671 return IBV_WC_WR_FLUSH_ERR; 672 case FLUSH_MW_BIND_ERR: 673 return IBV_WC_MW_BIND_ERR; 674 case FLUSH_REM_INV_REQ_ERR: 675 return IBV_WC_REM_INV_REQ_ERR; 676 case FLUSH_RETRY_EXC_ERR: 677 return IBV_WC_RETRY_EXC_ERR; 678 case FLUSH_FATAL_ERR: 679 default: 680 return IBV_WC_FATAL_ERR; 681 } 682 } 683 684 static inline void 685 set_ib_wc_op_sq(struct irdma_cq_poll_info *cur_cqe, struct ibv_wc *entry) 686 { 687 switch (cur_cqe->op_type) { 688 case IRDMA_OP_TYPE_RDMA_WRITE: 689 case IRDMA_OP_TYPE_RDMA_WRITE_SOL: 690 entry->opcode = IBV_WC_RDMA_WRITE; 691 break; 692 case IRDMA_OP_TYPE_RDMA_READ: 693 entry->opcode = IBV_WC_RDMA_READ; 694 break; 695 case IRDMA_OP_TYPE_SEND_SOL: 696 case IRDMA_OP_TYPE_SEND_SOL_INV: 697 case IRDMA_OP_TYPE_SEND_INV: 698 case IRDMA_OP_TYPE_SEND: 699 entry->opcode = IBV_WC_SEND; 700 break; 701 case IRDMA_OP_TYPE_BIND_MW: 702 entry->opcode = IBV_WC_BIND_MW; 703 break; 704 case IRDMA_OP_TYPE_INV_STAG: 705 entry->opcode = IBV_WC_LOCAL_INV; 706 break; 707 default: 708 entry->status = IBV_WC_GENERAL_ERR; 709 printf("%s: Invalid opcode = %d in CQE\n", 710 __func__, cur_cqe->op_type); 711 } 712 } 713 714 static inline void 715 set_ib_wc_op_rq(struct irdma_cq_poll_info *cur_cqe, 716 struct ibv_wc *entry, bool send_imm_support) 717 { 718 if (!send_imm_support) { 719 entry->opcode = cur_cqe->imm_valid ? IBV_WC_RECV_RDMA_WITH_IMM : 720 IBV_WC_RECV; 721 return; 722 } 723 switch (cur_cqe->op_type) { 724 case IBV_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE: 725 case IBV_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE: 726 entry->opcode = IBV_WC_RECV_RDMA_WITH_IMM; 727 break; 728 default: 729 entry->opcode = IBV_WC_RECV; 730 } 731 } 732 733 /** 734 * irdma_process_cqe_ext - process current cqe for extended CQ 735 * @cur_cqe - current cqe info 736 */ 737 static void 738 irdma_process_cqe_ext(struct irdma_cq_poll_info *cur_cqe) 739 { 740 struct irdma_ucq *iwucq = container_of(cur_cqe, struct irdma_ucq, cur_cqe); 741 struct ibv_cq_ex *ibvcq_ex = &iwucq->verbs_cq.cq_ex; 742 743 ibvcq_ex->wr_id = cur_cqe->wr_id; 744 if (cur_cqe->error) 745 ibvcq_ex->status = (cur_cqe->comp_status == IRDMA_COMPL_STATUS_FLUSHED) ? 746 irdma_flush_err_to_ib_wc_status(cur_cqe->minor_err) : IBV_WC_GENERAL_ERR; 747 else 748 ibvcq_ex->status = IBV_WC_SUCCESS; 749 } 750 751 /** 752 * irdma_process_cqe - process current cqe info 753 * @entry - ibv_wc object to fill in for non-extended CQ 754 * @cur_cqe - current cqe info 755 */ 756 static void 757 irdma_process_cqe(struct ibv_wc *entry, struct irdma_cq_poll_info *cur_cqe) 758 { 759 struct irdma_qp_uk *qp; 760 struct ibv_qp *ib_qp; 761 762 entry->wc_flags = 0; 763 entry->wr_id = cur_cqe->wr_id; 764 entry->qp_num = cur_cqe->qp_id; 765 qp = cur_cqe->qp_handle; 766 ib_qp = qp->back_qp; 767 768 if (cur_cqe->error) { 769 entry->status = (cur_cqe->comp_status == IRDMA_COMPL_STATUS_FLUSHED) ? 770 irdma_flush_err_to_ib_wc_status(cur_cqe->minor_err) : IBV_WC_GENERAL_ERR; 771 entry->vendor_err = cur_cqe->major_err << 16 | 772 cur_cqe->minor_err; 773 } else { 774 entry->status = IBV_WC_SUCCESS; 775 } 776 777 if (cur_cqe->imm_valid) { 778 entry->imm_data = htonl(cur_cqe->imm_data); 779 entry->wc_flags |= IBV_WC_WITH_IMM; 780 } 781 782 if (cur_cqe->q_type == IRDMA_CQE_QTYPE_SQ) { 783 set_ib_wc_op_sq(cur_cqe, entry); 784 } else { 785 set_ib_wc_op_rq(cur_cqe, entry, 786 qp->qp_caps & IRDMA_SEND_WITH_IMM ? 787 true : false); 788 if (ib_qp->qp_type != IBV_QPT_UD && 789 cur_cqe->stag_invalid_set) { 790 entry->invalidated_rkey = cur_cqe->inv_stag; 791 entry->wc_flags |= IBV_WC_WITH_INV; 792 } 793 } 794 795 if (ib_qp->qp_type == IBV_QPT_UD) { 796 entry->src_qp = cur_cqe->ud_src_qpn; 797 entry->wc_flags |= IBV_WC_GRH; 798 } else { 799 entry->src_qp = cur_cqe->qp_id; 800 } 801 entry->byte_len = cur_cqe->bytes_xfered; 802 } 803 804 /** 805 * irdma_poll_one - poll one entry of the CQ 806 * @ukcq: ukcq to poll 807 * @cur_cqe: current CQE info to be filled in 808 * @entry: ibv_wc object to be filled for non-extended CQ or NULL for extended CQ 809 * 810 * Returns the internal irdma device error code or 0 on success 811 */ 812 static int 813 irdma_poll_one(struct irdma_cq_uk *ukcq, struct irdma_cq_poll_info *cur_cqe, 814 struct ibv_wc *entry) 815 { 816 int ret = irdma_uk_cq_poll_cmpl(ukcq, cur_cqe); 817 818 if (ret) 819 return ret; 820 821 if (!entry) 822 irdma_process_cqe_ext(cur_cqe); 823 else 824 irdma_process_cqe(entry, cur_cqe); 825 826 return 0; 827 } 828 829 /** 830 * __irdma_upoll_cq - irdma util function to poll device CQ 831 * @iwucq: irdma cq to poll 832 * @num_entries: max cq entries to poll 833 * @entry: pointer to array of ibv_wc objects to be filled in for each completion or NULL if ext CQ 834 * 835 * Returns non-negative value equal to the number of completions 836 * found. On failure, EINVAL 837 */ 838 static int 839 __irdma_upoll_cq(struct irdma_ucq *iwucq, int num_entries, 840 struct ibv_wc *entry) 841 { 842 struct irdma_cq_buf *cq_buf, *next; 843 struct irdma_cq_buf *last_buf = NULL; 844 struct irdma_cq_poll_info *cur_cqe = &iwucq->cur_cqe; 845 bool cq_new_cqe = false; 846 int resized_bufs = 0; 847 int npolled = 0; 848 int ret; 849 850 /* go through the list of previously resized CQ buffers */ 851 LIST_FOREACH_SAFE(cq_buf, &iwucq->resize_list, list, next) { 852 while (npolled < num_entries) { 853 ret = irdma_poll_one(&cq_buf->cq, cur_cqe, 854 entry ? entry + npolled : NULL); 855 if (!ret) { 856 ++npolled; 857 cq_new_cqe = true; 858 continue; 859 } 860 if (ret == ENOENT) 861 break; 862 /* QP using the CQ is destroyed. Skip reporting this CQE */ 863 if (ret == EFAULT) { 864 cq_new_cqe = true; 865 continue; 866 } 867 goto error; 868 } 869 870 /* save the resized CQ buffer which received the last cqe */ 871 if (cq_new_cqe) 872 last_buf = cq_buf; 873 cq_new_cqe = false; 874 } 875 876 /* check the current CQ for new cqes */ 877 while (npolled < num_entries) { 878 ret = irdma_poll_one(&iwucq->cq, cur_cqe, 879 entry ? entry + npolled : NULL); 880 if (!ret) { 881 ++npolled; 882 cq_new_cqe = true; 883 continue; 884 } 885 if (ret == ENOENT) 886 break; 887 /* QP using the CQ is destroyed. Skip reporting this CQE */ 888 if (ret == EFAULT) { 889 cq_new_cqe = true; 890 continue; 891 } 892 goto error; 893 } 894 895 if (cq_new_cqe) 896 /* all previous CQ resizes are complete */ 897 resized_bufs = irdma_process_resize_list(iwucq, NULL); 898 else if (last_buf) 899 /* only CQ resizes up to the last_buf are complete */ 900 resized_bufs = irdma_process_resize_list(iwucq, last_buf); 901 if (resized_bufs) 902 /* report to the HW the number of complete CQ resizes */ 903 irdma_uk_cq_set_resized_cnt(&iwucq->cq, resized_bufs); 904 905 return npolled; 906 907 error: 908 printf("%s: Error polling CQ, irdma_err: %d\n", __func__, ret); 909 910 return EINVAL; 911 } 912 913 /** 914 * irdma_upoll_cq - verb API callback to poll device CQ 915 * @cq: ibv_cq to poll 916 * @num_entries: max cq entries to poll 917 * @entry: pointer to array of ibv_wc objects to be filled in for each completion 918 * 919 * Returns non-negative value equal to the number of completions 920 * found and a negative error code on failure 921 */ 922 int 923 irdma_upoll_cq(struct ibv_cq *cq, int num_entries, struct ibv_wc *entry) 924 { 925 struct irdma_ucq *iwucq; 926 int ret; 927 928 iwucq = container_of(cq, struct irdma_ucq, verbs_cq.cq); 929 ret = pthread_spin_lock(&iwucq->lock); 930 if (ret) 931 return -ret; 932 933 ret = __irdma_upoll_cq(iwucq, num_entries, entry); 934 935 pthread_spin_unlock(&iwucq->lock); 936 937 return ret; 938 } 939 940 /** 941 * irdma_start_poll - verb_ex API callback to poll batch of WC's 942 * @ibvcq_ex: ibv extended CQ 943 * @attr: attributes (not used) 944 * 945 * Start polling batch of work completions. Return 0 on success, ENONENT when 946 * no completions are available on CQ. And an error code on errors 947 */ 948 static int 949 irdma_start_poll(struct ibv_cq_ex *ibvcq_ex, struct ibv_poll_cq_attr *attr) 950 { 951 struct irdma_ucq *iwucq; 952 int ret; 953 954 iwucq = container_of(ibvcq_ex, struct irdma_ucq, verbs_cq.cq_ex); 955 ret = pthread_spin_lock(&iwucq->lock); 956 if (ret) 957 return ret; 958 959 ret = __irdma_upoll_cq(iwucq, 1, NULL); 960 if (ret == 1) 961 return 0; 962 963 /* No Completions on CQ */ 964 if (!ret) 965 ret = ENOENT; 966 967 pthread_spin_unlock(&iwucq->lock); 968 969 return ret; 970 } 971 972 /** 973 * irdma_next_poll - verb_ex API callback to get next WC 974 * @ibvcq_ex: ibv extended CQ 975 * 976 * Return 0 on success, ENONENT when no completions are available on CQ. 977 * And an error code on errors 978 */ 979 static int 980 irdma_next_poll(struct ibv_cq_ex *ibvcq_ex) 981 { 982 struct irdma_ucq *iwucq; 983 int ret; 984 985 iwucq = container_of(ibvcq_ex, struct irdma_ucq, verbs_cq.cq_ex); 986 ret = __irdma_upoll_cq(iwucq, 1, NULL); 987 if (ret == 1) 988 return 0; 989 990 /* No Completions on CQ */ 991 if (!ret) 992 ret = ENOENT; 993 994 return ret; 995 } 996 997 /** 998 * irdma_end_poll - verb_ex API callback to end polling of WC's 999 * @ibvcq_ex: ibv extended CQ 1000 */ 1001 static void 1002 irdma_end_poll(struct ibv_cq_ex *ibvcq_ex) 1003 { 1004 struct irdma_ucq *iwucq = container_of(ibvcq_ex, struct irdma_ucq, 1005 verbs_cq.cq_ex); 1006 1007 pthread_spin_unlock(&iwucq->lock); 1008 } 1009 1010 static enum ibv_wc_opcode 1011 irdma_wc_read_opcode(struct ibv_cq_ex *ibvcq_ex) 1012 { 1013 struct irdma_ucq *iwucq = container_of(ibvcq_ex, struct irdma_ucq, 1014 verbs_cq.cq_ex); 1015 1016 switch (iwucq->cur_cqe.op_type) { 1017 case IRDMA_OP_TYPE_RDMA_WRITE: 1018 case IRDMA_OP_TYPE_RDMA_WRITE_SOL: 1019 return IBV_WC_RDMA_WRITE; 1020 case IRDMA_OP_TYPE_RDMA_READ: 1021 return IBV_WC_RDMA_READ; 1022 case IRDMA_OP_TYPE_SEND_SOL: 1023 case IRDMA_OP_TYPE_SEND_SOL_INV: 1024 case IRDMA_OP_TYPE_SEND_INV: 1025 case IRDMA_OP_TYPE_SEND: 1026 return IBV_WC_SEND; 1027 case IRDMA_OP_TYPE_BIND_MW: 1028 return IBV_WC_BIND_MW; 1029 case IRDMA_OP_TYPE_REC: 1030 return IBV_WC_RECV; 1031 case IRDMA_OP_TYPE_REC_IMM: 1032 return IBV_WC_RECV_RDMA_WITH_IMM; 1033 case IRDMA_OP_TYPE_INV_STAG: 1034 return IBV_WC_LOCAL_INV; 1035 } 1036 1037 printf("%s: Invalid opcode = %d in CQE\n", __func__, 1038 iwucq->cur_cqe.op_type); 1039 1040 return 0; 1041 } 1042 1043 static uint32_t irdma_wc_read_vendor_err(struct ibv_cq_ex *ibvcq_ex){ 1044 struct irdma_cq_poll_info *cur_cqe; 1045 struct irdma_ucq *iwucq; 1046 1047 iwucq = container_of(ibvcq_ex, struct irdma_ucq, verbs_cq.cq_ex); 1048 cur_cqe = &iwucq->cur_cqe; 1049 1050 return cur_cqe->error ? cur_cqe->major_err << 16 | cur_cqe->minor_err : 0; 1051 } 1052 1053 static int 1054 irdma_wc_read_wc_flags(struct ibv_cq_ex *ibvcq_ex) 1055 { 1056 struct irdma_cq_poll_info *cur_cqe; 1057 struct irdma_ucq *iwucq; 1058 struct irdma_qp_uk *qp; 1059 struct ibv_qp *ib_qp; 1060 int wc_flags = 0; 1061 1062 iwucq = container_of(ibvcq_ex, struct irdma_ucq, verbs_cq.cq_ex); 1063 cur_cqe = &iwucq->cur_cqe; 1064 qp = cur_cqe->qp_handle; 1065 ib_qp = qp->back_qp; 1066 1067 if (cur_cqe->imm_valid) 1068 wc_flags |= IBV_WC_WITH_IMM; 1069 1070 if (ib_qp->qp_type == IBV_QPT_UD) { 1071 wc_flags |= IBV_WC_GRH; 1072 } else { 1073 if (cur_cqe->stag_invalid_set) { 1074 switch (cur_cqe->op_type) { 1075 case IRDMA_OP_TYPE_REC: 1076 wc_flags |= IBV_WC_WITH_INV; 1077 break; 1078 case IRDMA_OP_TYPE_REC_IMM: 1079 wc_flags |= IBV_WC_WITH_INV; 1080 break; 1081 } 1082 } 1083 } 1084 1085 return wc_flags; 1086 } 1087 1088 static uint32_t irdma_wc_read_byte_len(struct ibv_cq_ex *ibvcq_ex){ 1089 struct irdma_ucq *iwucq = container_of(ibvcq_ex, struct irdma_ucq, 1090 verbs_cq.cq_ex); 1091 1092 return iwucq->cur_cqe.bytes_xfered; 1093 } 1094 1095 static __be32 irdma_wc_read_imm_data(struct ibv_cq_ex *ibvcq_ex){ 1096 struct irdma_cq_poll_info *cur_cqe; 1097 struct irdma_ucq *iwucq; 1098 1099 iwucq = container_of(ibvcq_ex, struct irdma_ucq, verbs_cq.cq_ex); 1100 cur_cqe = &iwucq->cur_cqe; 1101 1102 return cur_cqe->imm_valid ? htonl(cur_cqe->imm_data) : 0; 1103 } 1104 1105 static uint32_t irdma_wc_read_qp_num(struct ibv_cq_ex *ibvcq_ex){ 1106 struct irdma_ucq *iwucq = container_of(ibvcq_ex, struct irdma_ucq, 1107 verbs_cq.cq_ex); 1108 1109 return iwucq->cur_cqe.qp_id; 1110 } 1111 1112 static uint32_t irdma_wc_read_src_qp(struct ibv_cq_ex *ibvcq_ex){ 1113 struct irdma_cq_poll_info *cur_cqe; 1114 struct irdma_ucq *iwucq; 1115 struct irdma_qp_uk *qp; 1116 struct ibv_qp *ib_qp; 1117 1118 iwucq = container_of(ibvcq_ex, struct irdma_ucq, verbs_cq.cq_ex); 1119 cur_cqe = &iwucq->cur_cqe; 1120 qp = cur_cqe->qp_handle; 1121 ib_qp = qp->back_qp; 1122 1123 return ib_qp->qp_type == IBV_QPT_UD ? cur_cqe->ud_src_qpn : cur_cqe->qp_id; 1124 } 1125 1126 static uint8_t irdma_wc_read_sl(struct ibv_cq_ex *ibvcq_ex){ 1127 return 0; 1128 } 1129 1130 void 1131 irdma_ibvcq_ex_fill_priv_funcs(struct irdma_ucq *iwucq, 1132 struct ibv_cq_init_attr_ex *attr_ex) 1133 { 1134 struct ibv_cq_ex *ibvcq_ex = &iwucq->verbs_cq.cq_ex; 1135 1136 ibvcq_ex->start_poll = irdma_start_poll; 1137 ibvcq_ex->end_poll = irdma_end_poll; 1138 ibvcq_ex->next_poll = irdma_next_poll; 1139 1140 ibvcq_ex->read_opcode = irdma_wc_read_opcode; 1141 ibvcq_ex->read_vendor_err = irdma_wc_read_vendor_err; 1142 ibvcq_ex->read_wc_flags = irdma_wc_read_wc_flags; 1143 1144 if (attr_ex->wc_flags & IBV_WC_EX_WITH_BYTE_LEN) 1145 ibvcq_ex->read_byte_len = irdma_wc_read_byte_len; 1146 if (attr_ex->wc_flags & IBV_WC_EX_WITH_IMM) 1147 ibvcq_ex->read_imm_data = irdma_wc_read_imm_data; 1148 if (attr_ex->wc_flags & IBV_WC_EX_WITH_QP_NUM) 1149 ibvcq_ex->read_qp_num = irdma_wc_read_qp_num; 1150 if (attr_ex->wc_flags & IBV_WC_EX_WITH_SRC_QP) 1151 ibvcq_ex->read_src_qp = irdma_wc_read_src_qp; 1152 if (attr_ex->wc_flags & IBV_WC_EX_WITH_SL) 1153 ibvcq_ex->read_sl = irdma_wc_read_sl; 1154 } 1155 1156 /** 1157 * irdma_arm_cq - arm of cq 1158 * @iwucq: cq to which arm 1159 * @cq_notify: notification params 1160 */ 1161 static void 1162 irdma_arm_cq(struct irdma_ucq *iwucq, 1163 enum irdma_cmpl_notify cq_notify) 1164 { 1165 iwucq->is_armed = true; 1166 iwucq->arm_sol = true; 1167 iwucq->skip_arm = false; 1168 iwucq->skip_sol = true; 1169 irdma_uk_cq_request_notification(&iwucq->cq, cq_notify); 1170 } 1171 1172 /** 1173 * irdma_uarm_cq - callback for arm of cq 1174 * @cq: cq to arm 1175 * @solicited: to get notify params 1176 */ 1177 int 1178 irdma_uarm_cq(struct ibv_cq *cq, int solicited) 1179 { 1180 struct irdma_ucq *iwucq; 1181 enum irdma_cmpl_notify cq_notify = IRDMA_CQ_COMPL_EVENT; 1182 int ret; 1183 1184 iwucq = container_of(cq, struct irdma_ucq, verbs_cq.cq); 1185 if (solicited) 1186 cq_notify = IRDMA_CQ_COMPL_SOLICITED; 1187 1188 ret = pthread_spin_lock(&iwucq->lock); 1189 if (ret) 1190 return ret; 1191 1192 if (iwucq->is_armed) { 1193 if (iwucq->arm_sol && !solicited) { 1194 irdma_arm_cq(iwucq, cq_notify); 1195 } else { 1196 iwucq->skip_arm = true; 1197 iwucq->skip_sol = solicited ? true : false; 1198 } 1199 } else { 1200 irdma_arm_cq(iwucq, cq_notify); 1201 } 1202 1203 pthread_spin_unlock(&iwucq->lock); 1204 1205 return 0; 1206 } 1207 1208 /** 1209 * irdma_cq_event - cq to do completion event 1210 * @cq: cq to arm 1211 */ 1212 void 1213 irdma_cq_event(struct ibv_cq *cq) 1214 { 1215 struct irdma_ucq *iwucq; 1216 1217 iwucq = container_of(cq, struct irdma_ucq, verbs_cq.cq); 1218 if (pthread_spin_lock(&iwucq->lock)) 1219 return; 1220 1221 if (iwucq->skip_arm) 1222 irdma_arm_cq(iwucq, IRDMA_CQ_COMPL_EVENT); 1223 else 1224 iwucq->is_armed = false; 1225 1226 pthread_spin_unlock(&iwucq->lock); 1227 } 1228 1229 void * 1230 irdma_mmap(int fd, off_t offset) 1231 { 1232 void *map; 1233 1234 map = mmap(NULL, IRDMA_HW_PAGE_SIZE, PROT_WRITE | PROT_READ, MAP_SHARED, 1235 fd, offset); 1236 if (map == MAP_FAILED) 1237 return map; 1238 1239 if (ibv_dontfork_range(map, IRDMA_HW_PAGE_SIZE)) { 1240 munmap(map, IRDMA_HW_PAGE_SIZE); 1241 return MAP_FAILED; 1242 } 1243 1244 return map; 1245 } 1246 1247 void 1248 irdma_munmap(void *map) 1249 { 1250 ibv_dofork_range(map, IRDMA_HW_PAGE_SIZE); 1251 munmap(map, IRDMA_HW_PAGE_SIZE); 1252 } 1253 1254 /** 1255 * irdma_destroy_vmapped_qp - destroy resources for qp 1256 * @iwuqp: qp struct for resources 1257 */ 1258 static int 1259 irdma_destroy_vmapped_qp(struct irdma_uqp *iwuqp) 1260 { 1261 int ret; 1262 1263 ret = ibv_cmd_destroy_qp(&iwuqp->ibv_qp); 1264 if (ret) 1265 return ret; 1266 1267 if (iwuqp->qp.push_db) 1268 irdma_munmap(iwuqp->qp.push_db); 1269 if (iwuqp->qp.push_wqe) 1270 irdma_munmap(iwuqp->qp.push_wqe); 1271 1272 ibv_cmd_dereg_mr(&iwuqp->vmr.ibv_mr); 1273 1274 return 0; 1275 } 1276 1277 /** 1278 * irdma_vmapped_qp - create resources for qp 1279 * @iwuqp: qp struct for resources 1280 * @pd: pd for the qp 1281 * @attr: attributes of qp passed 1282 * @resp: response back from create qp 1283 * @info: uk info for initializing user level qp 1284 * @abi_ver: abi version of the create qp command 1285 */ 1286 static int 1287 irdma_vmapped_qp(struct irdma_uqp *iwuqp, struct ibv_pd *pd, 1288 struct ibv_qp_init_attr *attr, 1289 struct irdma_qp_uk_init_info *info, 1290 bool legacy_mode) 1291 { 1292 struct irdma_ucreate_qp cmd = {}; 1293 size_t sqsize, rqsize, totalqpsize; 1294 struct irdma_ucreate_qp_resp resp = {}; 1295 struct irdma_ureg_mr reg_mr_cmd = {}; 1296 struct ibv_reg_mr_resp reg_mr_resp = {}; 1297 int ret; 1298 1299 sqsize = roundup(info->sq_depth * IRDMA_QP_WQE_MIN_SIZE, IRDMA_HW_PAGE_SIZE); 1300 rqsize = roundup(info->rq_depth * IRDMA_QP_WQE_MIN_SIZE, IRDMA_HW_PAGE_SIZE); 1301 totalqpsize = rqsize + sqsize + IRDMA_DB_SHADOW_AREA_SIZE; 1302 info->sq = irdma_alloc_hw_buf(totalqpsize); 1303 iwuqp->buf_size = totalqpsize; 1304 1305 if (!info->sq) 1306 return ENOMEM; 1307 1308 memset(info->sq, 0, totalqpsize); 1309 info->rq = &info->sq[sqsize / IRDMA_QP_WQE_MIN_SIZE]; 1310 info->shadow_area = info->rq[rqsize / IRDMA_QP_WQE_MIN_SIZE].elem; 1311 1312 reg_mr_cmd.reg_type = IRDMA_MEMREG_TYPE_QP; 1313 reg_mr_cmd.sq_pages = sqsize >> IRDMA_HW_PAGE_SHIFT; 1314 reg_mr_cmd.rq_pages = rqsize >> IRDMA_HW_PAGE_SHIFT; 1315 1316 ret = ibv_cmd_reg_mr(pd, info->sq, totalqpsize, 1317 (uintptr_t)info->sq, IBV_ACCESS_LOCAL_WRITE, 1318 &iwuqp->vmr.ibv_mr, ®_mr_cmd.ibv_cmd, 1319 sizeof(reg_mr_cmd), ®_mr_resp, 1320 sizeof(reg_mr_resp)); 1321 if (ret) 1322 goto err_dereg_mr; 1323 1324 cmd.user_wqe_bufs = (__u64) ((uintptr_t)info->sq); 1325 cmd.user_compl_ctx = (__u64) (uintptr_t)&iwuqp->qp; 1326 ret = ibv_cmd_create_qp(pd, &iwuqp->ibv_qp, attr, &cmd.ibv_cmd, 1327 sizeof(cmd), &resp.ibv_resp, 1328 sizeof(struct irdma_ucreate_qp_resp)); 1329 if (ret) 1330 goto err_qp; 1331 1332 info->sq_size = resp.actual_sq_size; 1333 info->rq_size = resp.actual_rq_size; 1334 info->first_sq_wq = legacy_mode ? 1 : resp.lsmm; 1335 info->qp_caps = resp.qp_caps; 1336 info->qp_id = resp.qp_id; 1337 iwuqp->irdma_drv_opt = resp.irdma_drv_opt; 1338 iwuqp->ibv_qp.qp_num = resp.qp_id; 1339 1340 iwuqp->send_cq = container_of(attr->send_cq, struct irdma_ucq, 1341 verbs_cq.cq); 1342 iwuqp->recv_cq = container_of(attr->recv_cq, struct irdma_ucq, 1343 verbs_cq.cq); 1344 iwuqp->send_cq->uqp = iwuqp; 1345 iwuqp->recv_cq->uqp = iwuqp; 1346 1347 return 0; 1348 err_qp: 1349 ibv_cmd_dereg_mr(&iwuqp->vmr.ibv_mr); 1350 err_dereg_mr: 1351 printf("%s: failed to create QP, status %d\n", __func__, ret); 1352 irdma_free_hw_buf(info->sq, iwuqp->buf_size); 1353 return ret; 1354 } 1355 1356 /** 1357 * irdma_ucreate_qp - create qp on user app 1358 * @pd: pd for the qp 1359 * @attr: attributes of the qp to be created (sizes, sge, cq) 1360 */ 1361 struct ibv_qp * 1362 irdma_ucreate_qp(struct ibv_pd *pd, 1363 struct ibv_qp_init_attr *attr) 1364 { 1365 struct irdma_qp_uk_init_info info = {}; 1366 struct irdma_uk_attrs *uk_attrs; 1367 struct irdma_uvcontext *iwvctx; 1368 struct irdma_uqp *iwuqp; 1369 int status; 1370 1371 if (attr->qp_type != IBV_QPT_RC && attr->qp_type != IBV_QPT_UD) { 1372 printf("%s: failed to create QP, unsupported QP type: 0x%x\n", 1373 __func__, attr->qp_type); 1374 errno = EOPNOTSUPP; 1375 return NULL; 1376 } 1377 1378 iwvctx = container_of(pd->context, struct irdma_uvcontext, ibv_ctx); 1379 uk_attrs = &iwvctx->uk_attrs; 1380 1381 if (attr->cap.max_send_sge > uk_attrs->max_hw_wq_frags || 1382 attr->cap.max_recv_sge > uk_attrs->max_hw_wq_frags || 1383 attr->cap.max_inline_data > uk_attrs->max_hw_inline) { 1384 errno = EINVAL; 1385 return NULL; 1386 } 1387 1388 info.uk_attrs = uk_attrs; 1389 info.sq_size = attr->cap.max_send_wr; 1390 info.rq_size = attr->cap.max_recv_wr; 1391 info.max_sq_frag_cnt = attr->cap.max_send_sge; 1392 info.max_rq_frag_cnt = attr->cap.max_recv_sge; 1393 info.max_inline_data = attr->cap.max_inline_data; 1394 info.abi_ver = iwvctx->abi_ver; 1395 1396 status = irdma_uk_calc_depth_shift_sq(&info, &info.sq_depth, &info.sq_shift); 1397 if (status) { 1398 printf("%s: invalid SQ attributes, max_send_wr=%d max_send_sge=%d max_inline=%d\n", 1399 __func__, attr->cap.max_send_wr, attr->cap.max_send_sge, 1400 attr->cap.max_inline_data); 1401 errno = status; 1402 return NULL; 1403 } 1404 1405 status = irdma_uk_calc_depth_shift_rq(&info, &info.rq_depth, &info.rq_shift); 1406 if (status) { 1407 printf("%s: invalid RQ attributes, recv_wr=%d recv_sge=%d\n", 1408 __func__, attr->cap.max_recv_wr, attr->cap.max_recv_sge); 1409 errno = status; 1410 return NULL; 1411 } 1412 1413 iwuqp = memalign(1024, sizeof(*iwuqp)); 1414 if (!iwuqp) 1415 return NULL; 1416 1417 memset(iwuqp, 0, sizeof(*iwuqp)); 1418 1419 status = pthread_spin_init(&iwuqp->lock, PTHREAD_PROCESS_PRIVATE); 1420 if (status) 1421 goto err_free_qp; 1422 1423 info.sq_size = info.sq_depth >> info.sq_shift; 1424 info.rq_size = info.rq_depth >> info.rq_shift; 1425 /** 1426 * Maintain backward compatibility with older ABI which pass sq 1427 * and rq depth (in quanta) in cap.max_send_wr a cap.max_recv_wr 1428 */ 1429 if (!iwvctx->use_raw_attrs) { 1430 attr->cap.max_send_wr = info.sq_size; 1431 attr->cap.max_recv_wr = info.rq_size; 1432 } 1433 1434 iwuqp->recv_sges = calloc(attr->cap.max_recv_sge, sizeof(*iwuqp->recv_sges)); 1435 if (!iwuqp->recv_sges) { 1436 status = errno; /* preserve errno */ 1437 goto err_destroy_lock; 1438 } 1439 1440 info.wqe_alloc_db = (u32 *)iwvctx->db; 1441 info.legacy_mode = iwvctx->legacy_mode; 1442 info.sq_wrtrk_array = calloc(info.sq_depth, sizeof(*info.sq_wrtrk_array)); 1443 if (!info.sq_wrtrk_array) { 1444 status = errno; /* preserve errno */ 1445 goto err_free_rsges; 1446 } 1447 1448 info.rq_wrid_array = calloc(info.rq_depth, sizeof(*info.rq_wrid_array)); 1449 if (!info.rq_wrid_array) { 1450 status = errno; /* preserve errno */ 1451 goto err_free_sq_wrtrk; 1452 } 1453 1454 iwuqp->sq_sig_all = attr->sq_sig_all; 1455 iwuqp->qp_type = attr->qp_type; 1456 status = irdma_vmapped_qp(iwuqp, pd, attr, &info, iwvctx->legacy_mode); 1457 if (status) 1458 goto err_free_rq_wrid; 1459 1460 iwuqp->qp.back_qp = iwuqp; 1461 iwuqp->qp.lock = &iwuqp->lock; 1462 1463 status = irdma_uk_qp_init(&iwuqp->qp, &info); 1464 if (status) 1465 goto err_free_vmap_qp; 1466 1467 attr->cap.max_send_wr = (info.sq_depth - IRDMA_SQ_RSVD) >> info.sq_shift; 1468 attr->cap.max_recv_wr = (info.rq_depth - IRDMA_RQ_RSVD) >> info.rq_shift; 1469 1470 return &iwuqp->ibv_qp; 1471 1472 err_free_vmap_qp: 1473 irdma_destroy_vmapped_qp(iwuqp); 1474 irdma_free_hw_buf(info.sq, iwuqp->buf_size); 1475 err_free_rq_wrid: 1476 free(info.rq_wrid_array); 1477 err_free_sq_wrtrk: 1478 free(info.sq_wrtrk_array); 1479 err_free_rsges: 1480 free(iwuqp->recv_sges); 1481 err_destroy_lock: 1482 pthread_spin_destroy(&iwuqp->lock); 1483 err_free_qp: 1484 printf("%s: failed to create QP\n", __func__); 1485 free(iwuqp); 1486 1487 errno = status; 1488 return NULL; 1489 } 1490 1491 /** 1492 * irdma_uquery_qp - query qp for some attribute 1493 * @qp: qp for the attributes query 1494 * @attr: to return the attributes 1495 * @attr_mask: mask of what is query for 1496 * @init_attr: initial attributes during create_qp 1497 */ 1498 int 1499 irdma_uquery_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask, 1500 struct ibv_qp_init_attr *init_attr) 1501 { 1502 struct ibv_query_qp cmd; 1503 1504 return ibv_cmd_query_qp(qp, attr, attr_mask, init_attr, &cmd, 1505 sizeof(cmd)); 1506 } 1507 1508 /** 1509 * irdma_umodify_qp - send qp modify to driver 1510 * @qp: qp to modify 1511 * @attr: attribute to modify 1512 * @attr_mask: mask of the attribute 1513 */ 1514 int 1515 irdma_umodify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask) 1516 { 1517 struct irdma_umodify_qp_resp resp = {}; 1518 struct ibv_modify_qp cmd = {}; 1519 struct irdma_modify_qp_cmd cmd_ex = {}; 1520 struct irdma_uvcontext *iwvctx; 1521 struct irdma_uqp *iwuqp; 1522 1523 iwuqp = container_of(qp, struct irdma_uqp, ibv_qp); 1524 iwvctx = container_of(qp->context, struct irdma_uvcontext, ibv_ctx); 1525 1526 if (iwuqp->qp.qp_caps & IRDMA_PUSH_MODE && attr_mask & IBV_QP_STATE && 1527 iwvctx->uk_attrs.hw_rev > IRDMA_GEN_1) { 1528 u64 offset; 1529 void *map; 1530 int ret; 1531 1532 ret = ibv_cmd_modify_qp_ex(qp, attr, attr_mask, &cmd_ex.ibv_cmd, 1533 sizeof(cmd_ex.ibv_cmd), 1534 sizeof(cmd_ex), &resp.ibv_resp, 1535 sizeof(resp.ibv_resp), 1536 sizeof(resp)); 1537 if (!ret) 1538 iwuqp->qp.rd_fence_rate = resp.rd_fence_rate; 1539 if (ret || !resp.push_valid) 1540 return ret; 1541 1542 if (iwuqp->qp.push_wqe) 1543 return ret; 1544 1545 offset = resp.push_wqe_mmap_key; 1546 map = irdma_mmap(qp->context->cmd_fd, offset); 1547 if (map == MAP_FAILED) 1548 return ret; 1549 1550 iwuqp->qp.push_wqe = map; 1551 1552 offset = resp.push_db_mmap_key; 1553 map = irdma_mmap(qp->context->cmd_fd, offset); 1554 if (map == MAP_FAILED) { 1555 irdma_munmap(iwuqp->qp.push_wqe); 1556 iwuqp->qp.push_wqe = NULL; 1557 printf("failed to map push page, errno %d\n", errno); 1558 return ret; 1559 } 1560 iwuqp->qp.push_wqe += resp.push_offset; 1561 iwuqp->qp.push_db = map + resp.push_offset; 1562 1563 return ret; 1564 } else { 1565 return ibv_cmd_modify_qp(qp, attr, attr_mask, &cmd, sizeof(cmd)); 1566 } 1567 } 1568 1569 static void 1570 irdma_issue_flush(struct ibv_qp *qp, bool sq_flush, bool rq_flush) 1571 { 1572 struct irdma_umodify_qp_resp resp = {}; 1573 struct irdma_modify_qp_cmd cmd_ex = {}; 1574 struct ibv_qp_attr attr = {}; 1575 1576 attr.qp_state = IBV_QPS_ERR; 1577 cmd_ex.sq_flush = sq_flush; 1578 cmd_ex.rq_flush = rq_flush; 1579 1580 ibv_cmd_modify_qp_ex(qp, &attr, IBV_QP_STATE, 1581 &cmd_ex.ibv_cmd, 1582 sizeof(cmd_ex.ibv_cmd), 1583 sizeof(cmd_ex), &resp.ibv_resp, 1584 sizeof(resp.ibv_resp), 1585 sizeof(resp)); 1586 } 1587 1588 /** 1589 * irdma_clean_cqes - clean cq entries for qp 1590 * @qp: qp for which completions are cleaned 1591 * @iwcq: cq to be cleaned 1592 */ 1593 static void 1594 irdma_clean_cqes(struct irdma_qp_uk *qp, struct irdma_ucq *iwucq) 1595 { 1596 struct irdma_cq_uk *ukcq = &iwucq->cq; 1597 int ret; 1598 1599 ret = pthread_spin_lock(&iwucq->lock); 1600 if (ret) 1601 return; 1602 1603 irdma_uk_clean_cq(qp, ukcq); 1604 pthread_spin_unlock(&iwucq->lock); 1605 } 1606 1607 /** 1608 * irdma_udestroy_qp - destroy qp 1609 * @qp: qp to destroy 1610 */ 1611 int 1612 irdma_udestroy_qp(struct ibv_qp *qp) 1613 { 1614 struct irdma_uqp *iwuqp; 1615 int ret; 1616 1617 iwuqp = container_of(qp, struct irdma_uqp, ibv_qp); 1618 ret = pthread_spin_destroy(&iwuqp->lock); 1619 if (ret) 1620 goto err; 1621 1622 ret = irdma_destroy_vmapped_qp(iwuqp); 1623 if (ret) 1624 goto err; 1625 1626 /* Clean any pending completions from the cq(s) */ 1627 if (iwuqp->send_cq) 1628 irdma_clean_cqes(&iwuqp->qp, iwuqp->send_cq); 1629 1630 if (iwuqp->recv_cq && iwuqp->recv_cq != iwuqp->send_cq) 1631 irdma_clean_cqes(&iwuqp->qp, iwuqp->recv_cq); 1632 1633 if (iwuqp->qp.sq_wrtrk_array) 1634 free(iwuqp->qp.sq_wrtrk_array); 1635 if (iwuqp->qp.rq_wrid_array) 1636 free(iwuqp->qp.rq_wrid_array); 1637 1638 irdma_free_hw_buf(iwuqp->qp.sq_base, iwuqp->buf_size); 1639 free(iwuqp->recv_sges); 1640 free(iwuqp); 1641 return 0; 1642 1643 err: 1644 printf("%s: failed to destroy QP, status %d\n", 1645 __func__, ret); 1646 return ret; 1647 } 1648 1649 /** 1650 * irdma_copy_sg_list - copy sg list for qp 1651 * @sg_list: copied into sg_list 1652 * @sgl: copy from sgl 1653 * @num_sges: count of sg entries 1654 * @max_sges: count of max supported sg entries 1655 */ 1656 static void 1657 irdma_copy_sg_list(struct irdma_sge *sg_list, struct ibv_sge *sgl, 1658 int num_sges) 1659 { 1660 int i; 1661 1662 for (i = 0; i < num_sges; i++) { 1663 sg_list[i].tag_off = sgl[i].addr; 1664 sg_list[i].len = sgl[i].length; 1665 sg_list[i].stag = sgl[i].lkey; 1666 } 1667 } 1668 1669 /** 1670 * calc_type2_mw_stag - calculate type 2 MW stag 1671 * @rkey: desired rkey of the MW 1672 * @mw_rkey: type2 memory window rkey 1673 * 1674 * compute type2 memory window stag by taking lower 8 bits 1675 * of the desired rkey and leaving 24 bits if mw->rkey unchanged 1676 */ 1677 static inline u32 calc_type2_mw_stag(u32 rkey, u32 mw_rkey) { 1678 const u32 mask = 0xff; 1679 1680 return (rkey & mask) | (mw_rkey & ~mask); 1681 } 1682 1683 /** 1684 * irdma_post_send - post send wr for user application 1685 * @ib_qp: qp to post wr 1686 * @ib_wr: work request ptr 1687 * @bad_wr: return of bad wr if err 1688 */ 1689 int 1690 irdma_upost_send(struct ibv_qp *ib_qp, struct ibv_send_wr *ib_wr, 1691 struct ibv_send_wr **bad_wr) 1692 { 1693 struct irdma_post_sq_info info; 1694 struct irdma_uvcontext *iwvctx; 1695 struct irdma_uk_attrs *uk_attrs; 1696 struct irdma_uqp *iwuqp; 1697 bool reflush = false; 1698 int err = 0; 1699 1700 iwuqp = container_of(ib_qp, struct irdma_uqp, ibv_qp); 1701 iwvctx = container_of(ib_qp->context, struct irdma_uvcontext, ibv_ctx); 1702 uk_attrs = &iwvctx->uk_attrs; 1703 1704 err = pthread_spin_lock(&iwuqp->lock); 1705 if (err) 1706 return err; 1707 1708 if (!IRDMA_RING_MORE_WORK(iwuqp->qp.sq_ring) && 1709 ib_qp->state == IBV_QPS_ERR) 1710 reflush = true; 1711 1712 while (ib_wr) { 1713 memset(&info, 0, sizeof(info)); 1714 info.wr_id = (u64)(ib_wr->wr_id); 1715 if ((ib_wr->send_flags & IBV_SEND_SIGNALED) || 1716 iwuqp->sq_sig_all) 1717 info.signaled = true; 1718 if (ib_wr->send_flags & IBV_SEND_FENCE) 1719 info.read_fence = true; 1720 1721 switch (ib_wr->opcode) { 1722 case IBV_WR_SEND_WITH_IMM: 1723 if (iwuqp->qp.qp_caps & IRDMA_SEND_WITH_IMM) { 1724 info.imm_data_valid = true; 1725 info.imm_data = ntohl(ib_wr->imm_data); 1726 } else { 1727 err = EINVAL; 1728 break; 1729 } 1730 /* fallthrough */ 1731 case IBV_WR_SEND: 1732 case IBV_WR_SEND_WITH_INV: 1733 if (ib_wr->opcode == IBV_WR_SEND || 1734 ib_wr->opcode == IBV_WR_SEND_WITH_IMM) { 1735 if (ib_wr->send_flags & IBV_SEND_SOLICITED) 1736 info.op_type = IRDMA_OP_TYPE_SEND_SOL; 1737 else 1738 info.op_type = IRDMA_OP_TYPE_SEND; 1739 } else { 1740 if (ib_wr->send_flags & IBV_SEND_SOLICITED) 1741 info.op_type = IRDMA_OP_TYPE_SEND_SOL_INV; 1742 else 1743 info.op_type = IRDMA_OP_TYPE_SEND_INV; 1744 info.stag_to_inv = ib_wr->imm_data; 1745 } 1746 info.op.send.num_sges = ib_wr->num_sge; 1747 info.op.send.sg_list = (struct irdma_sge *)ib_wr->sg_list; 1748 if (ib_qp->qp_type == IBV_QPT_UD) { 1749 struct irdma_uah *ah = container_of(ib_wr->wr.ud.ah, 1750 struct irdma_uah, ibv_ah); 1751 1752 info.op.send.ah_id = ah->ah_id; 1753 info.op.send.qkey = ib_wr->wr.ud.remote_qkey; 1754 info.op.send.dest_qp = ib_wr->wr.ud.remote_qpn; 1755 } 1756 1757 if (ib_wr->send_flags & IBV_SEND_INLINE) 1758 err = irdma_uk_inline_send(&iwuqp->qp, &info, false); 1759 else 1760 err = irdma_uk_send(&iwuqp->qp, &info, false); 1761 break; 1762 case IBV_WR_RDMA_WRITE_WITH_IMM: 1763 if (iwuqp->qp.qp_caps & IRDMA_WRITE_WITH_IMM) { 1764 info.imm_data_valid = true; 1765 info.imm_data = ntohl(ib_wr->imm_data); 1766 } else { 1767 err = EINVAL; 1768 break; 1769 } 1770 /* fallthrough */ 1771 case IBV_WR_RDMA_WRITE: 1772 if (ib_wr->send_flags & IBV_SEND_SOLICITED) 1773 info.op_type = IRDMA_OP_TYPE_RDMA_WRITE_SOL; 1774 else 1775 info.op_type = IRDMA_OP_TYPE_RDMA_WRITE; 1776 1777 info.op.rdma_write.num_lo_sges = ib_wr->num_sge; 1778 info.op.rdma_write.lo_sg_list = (void *)ib_wr->sg_list; 1779 info.op.rdma_write.rem_addr.tag_off = ib_wr->wr.rdma.remote_addr; 1780 info.op.rdma_write.rem_addr.stag = ib_wr->wr.rdma.rkey; 1781 if (ib_wr->send_flags & IBV_SEND_INLINE) 1782 err = irdma_uk_inline_rdma_write(&iwuqp->qp, &info, false); 1783 else 1784 err = irdma_uk_rdma_write(&iwuqp->qp, &info, false); 1785 break; 1786 case IBV_WR_RDMA_READ: 1787 if (ib_wr->num_sge > uk_attrs->max_hw_read_sges) { 1788 err = EINVAL; 1789 break; 1790 } 1791 info.op_type = IRDMA_OP_TYPE_RDMA_READ; 1792 info.op.rdma_read.rem_addr.tag_off = ib_wr->wr.rdma.remote_addr; 1793 info.op.rdma_read.rem_addr.stag = ib_wr->wr.rdma.rkey; 1794 1795 info.op.rdma_read.lo_sg_list = (void *)ib_wr->sg_list; 1796 info.op.rdma_read.num_lo_sges = ib_wr->num_sge; 1797 err = irdma_uk_rdma_read(&iwuqp->qp, &info, false, false); 1798 break; 1799 case IBV_WR_BIND_MW: 1800 if (ib_qp->qp_type != IBV_QPT_RC) { 1801 err = EINVAL; 1802 break; 1803 } 1804 info.op_type = IRDMA_OP_TYPE_BIND_MW; 1805 info.op.bind_window.mr_stag = ib_wr->bind_mw.bind_info.mr->rkey; 1806 if (ib_wr->bind_mw.mw->type == IBV_MW_TYPE_1) { 1807 info.op.bind_window.mem_window_type_1 = true; 1808 info.op.bind_window.mw_stag = ib_wr->bind_mw.rkey; 1809 } else { 1810 struct verbs_mr *vmr = verbs_get_mr(ib_wr->bind_mw.bind_info.mr); 1811 1812 if (vmr->access & IBV_ACCESS_ZERO_BASED) { 1813 err = EINVAL; 1814 break; 1815 } 1816 info.op.bind_window.mw_stag = 1817 calc_type2_mw_stag(ib_wr->bind_mw.rkey, ib_wr->bind_mw.mw->rkey); 1818 ib_wr->bind_mw.mw->rkey = info.op.bind_window.mw_stag; 1819 1820 } 1821 1822 if (ib_wr->bind_mw.bind_info.mw_access_flags & IBV_ACCESS_ZERO_BASED) { 1823 info.op.bind_window.addressing_type = IRDMA_ADDR_TYPE_ZERO_BASED; 1824 info.op.bind_window.va = NULL; 1825 } else { 1826 info.op.bind_window.addressing_type = IRDMA_ADDR_TYPE_VA_BASED; 1827 info.op.bind_window.va = (void *)(uintptr_t)ib_wr->bind_mw.bind_info.addr; 1828 } 1829 info.op.bind_window.bind_len = ib_wr->bind_mw.bind_info.length; 1830 info.op.bind_window.ena_reads = 1831 (ib_wr->bind_mw.bind_info.mw_access_flags & IBV_ACCESS_REMOTE_READ) ? 1 : 0; 1832 info.op.bind_window.ena_writes = 1833 (ib_wr->bind_mw.bind_info.mw_access_flags & IBV_ACCESS_REMOTE_WRITE) ? 1 : 0; 1834 1835 err = irdma_uk_mw_bind(&iwuqp->qp, &info, false); 1836 break; 1837 case IBV_WR_LOCAL_INV: 1838 info.op_type = IRDMA_OP_TYPE_INV_STAG; 1839 info.op.inv_local_stag.target_stag = ib_wr->imm_data; 1840 err = irdma_uk_stag_local_invalidate(&iwuqp->qp, &info, true); 1841 break; 1842 default: 1843 /* error */ 1844 err = EINVAL; 1845 printf("%s: post work request failed, invalid opcode: 0x%x\n", 1846 __func__, ib_wr->opcode); 1847 break; 1848 } 1849 if (err) 1850 break; 1851 1852 ib_wr = ib_wr->next; 1853 } 1854 1855 if (err) 1856 *bad_wr = ib_wr; 1857 1858 irdma_uk_qp_post_wr(&iwuqp->qp); 1859 if (reflush) 1860 irdma_issue_flush(ib_qp, 1, 0); 1861 1862 pthread_spin_unlock(&iwuqp->lock); 1863 1864 return err; 1865 } 1866 1867 /** 1868 * irdma_post_recv - post receive wr for user application 1869 * @ib_wr: work request for receive 1870 * @bad_wr: bad wr caused an error 1871 */ 1872 int 1873 irdma_upost_recv(struct ibv_qp *ib_qp, struct ibv_recv_wr *ib_wr, 1874 struct ibv_recv_wr **bad_wr) 1875 { 1876 struct irdma_post_rq_info post_recv = {}; 1877 struct irdma_sge *sg_list; 1878 struct irdma_uqp *iwuqp; 1879 bool reflush = false; 1880 int err = 0; 1881 1882 iwuqp = container_of(ib_qp, struct irdma_uqp, ibv_qp); 1883 sg_list = iwuqp->recv_sges; 1884 1885 err = pthread_spin_lock(&iwuqp->lock); 1886 if (err) 1887 return err; 1888 1889 if (!IRDMA_RING_MORE_WORK(iwuqp->qp.rq_ring) && 1890 ib_qp->state == IBV_QPS_ERR) 1891 reflush = true; 1892 1893 while (ib_wr) { 1894 if (ib_wr->num_sge > iwuqp->qp.max_rq_frag_cnt) { 1895 *bad_wr = ib_wr; 1896 err = EINVAL; 1897 goto error; 1898 } 1899 post_recv.num_sges = ib_wr->num_sge; 1900 post_recv.wr_id = ib_wr->wr_id; 1901 irdma_copy_sg_list(sg_list, ib_wr->sg_list, ib_wr->num_sge); 1902 post_recv.sg_list = sg_list; 1903 err = irdma_uk_post_receive(&iwuqp->qp, &post_recv); 1904 if (err) { 1905 *bad_wr = ib_wr; 1906 goto error; 1907 } 1908 1909 if (reflush) 1910 irdma_issue_flush(ib_qp, 0, 1); 1911 1912 ib_wr = ib_wr->next; 1913 } 1914 error: 1915 pthread_spin_unlock(&iwuqp->lock); 1916 1917 return err; 1918 } 1919 1920 /** 1921 * irdma_ucreate_ah - create address handle associated with a pd 1922 * @ibpd: pd for the address handle 1923 * @attr: attributes of address handle 1924 */ 1925 struct ibv_ah * 1926 irdma_ucreate_ah(struct ibv_pd *ibpd, struct ibv_ah_attr *attr) 1927 { 1928 struct irdma_uah *ah; 1929 union ibv_gid sgid; 1930 struct irdma_ucreate_ah_resp resp = {}; 1931 int err; 1932 1933 if (ibv_query_gid(ibpd->context, attr->port_num, attr->grh.sgid_index, 1934 &sgid)) { 1935 fprintf(stderr, "irdma: Error from ibv_query_gid.\n"); 1936 errno = ENOENT; 1937 return NULL; 1938 } 1939 1940 ah = calloc(1, sizeof(*ah)); 1941 if (!ah) 1942 return NULL; 1943 1944 err = ibv_cmd_create_ah(ibpd, &ah->ibv_ah, attr, &resp.ibv_resp, 1945 sizeof(resp)); 1946 if (err) { 1947 free(ah); 1948 errno = err; 1949 return NULL; 1950 } 1951 1952 ah->ah_id = resp.ah_id; 1953 1954 return &ah->ibv_ah; 1955 } 1956 1957 /** 1958 * irdma_udestroy_ah - destroy the address handle 1959 * @ibah: address handle 1960 */ 1961 int 1962 irdma_udestroy_ah(struct ibv_ah *ibah) 1963 { 1964 struct irdma_uah *ah; 1965 int ret; 1966 1967 ah = container_of(ibah, struct irdma_uah, ibv_ah); 1968 1969 ret = ibv_cmd_destroy_ah(ibah); 1970 if (ret) 1971 return ret; 1972 1973 free(ah); 1974 1975 return 0; 1976 } 1977 1978 /** 1979 * irdma_uattach_mcast - Attach qp to multicast group implemented 1980 * @qp: The queue pair 1981 * @gid:The Global ID for multicast group 1982 * @lid: The Local ID 1983 */ 1984 int 1985 irdma_uattach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, 1986 uint16_t lid) 1987 { 1988 return ibv_cmd_attach_mcast(qp, gid, lid); 1989 } 1990 1991 /** 1992 * irdma_udetach_mcast - Detach qp from multicast group 1993 * @qp: The queue pair 1994 * @gid:The Global ID for multicast group 1995 * @lid: The Local ID 1996 */ 1997 int 1998 irdma_udetach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, 1999 uint16_t lid) 2000 { 2001 return ibv_cmd_detach_mcast(qp, gid, lid); 2002 } 2003 2004 /** 2005 * irdma_uresize_cq - resizes a cq 2006 * @cq: cq to resize 2007 * @cqe: the number of cqes of the new cq 2008 */ 2009 int 2010 irdma_uresize_cq(struct ibv_cq *cq, int cqe) 2011 { 2012 struct irdma_uvcontext *iwvctx; 2013 struct irdma_uk_attrs *uk_attrs; 2014 struct irdma_uresize_cq cmd = {}; 2015 struct ibv_resize_cq_resp resp = {}; 2016 struct irdma_ureg_mr reg_mr_cmd = {}; 2017 struct ibv_reg_mr_resp reg_mr_resp = {}; 2018 struct irdma_cq_buf *cq_buf = NULL; 2019 struct irdma_cqe *cq_base = NULL; 2020 struct verbs_mr new_mr = {}; 2021 struct irdma_ucq *iwucq; 2022 size_t cq_size; 2023 u32 cq_pages; 2024 int cqe_needed; 2025 int ret = 0; 2026 bool cqe_64byte_ena; 2027 2028 iwucq = container_of(cq, struct irdma_ucq, verbs_cq.cq); 2029 iwvctx = container_of(cq->context, struct irdma_uvcontext, ibv_ctx); 2030 uk_attrs = &iwvctx->uk_attrs; 2031 2032 if (!(uk_attrs->feature_flags & IRDMA_FEATURE_CQ_RESIZE)) 2033 return EOPNOTSUPP; 2034 2035 if (cqe < uk_attrs->min_hw_cq_size || cqe > uk_attrs->max_hw_cq_size - 1) 2036 return EINVAL; 2037 2038 cqe_64byte_ena = uk_attrs->feature_flags & IRDMA_FEATURE_64_BYTE_CQE ? true : false; 2039 2040 cqe_needed = get_cq_size(cqe, uk_attrs->hw_rev, cqe_64byte_ena); 2041 2042 if (cqe_needed == iwucq->cq.cq_size) 2043 return 0; 2044 2045 cq_size = get_cq_total_bytes(cqe_needed, cqe_64byte_ena); 2046 cq_pages = cq_size >> IRDMA_HW_PAGE_SHIFT; 2047 cq_base = irdma_alloc_hw_buf(cq_size); 2048 if (!cq_base) 2049 return ENOMEM; 2050 2051 memset(cq_base, 0, cq_size); 2052 2053 cq_buf = malloc(sizeof(*cq_buf)); 2054 if (!cq_buf) { 2055 ret = ENOMEM; 2056 goto err_buf; 2057 } 2058 2059 new_mr.ibv_mr.pd = iwucq->vmr.ibv_mr.pd; 2060 reg_mr_cmd.reg_type = IRDMA_MEMREG_TYPE_CQ; 2061 reg_mr_cmd.cq_pages = cq_pages; 2062 2063 ret = ibv_cmd_reg_mr(new_mr.ibv_mr.pd, cq_base, cq_size, 2064 (uintptr_t)cq_base, IBV_ACCESS_LOCAL_WRITE, 2065 &new_mr.ibv_mr, ®_mr_cmd.ibv_cmd, sizeof(reg_mr_cmd), 2066 ®_mr_resp, sizeof(reg_mr_resp)); 2067 if (ret) 2068 goto err_dereg_mr; 2069 2070 ret = pthread_spin_lock(&iwucq->lock); 2071 if (ret) 2072 goto err_lock; 2073 2074 cmd.user_cq_buffer = (__u64) ((uintptr_t)cq_base); 2075 ret = ibv_cmd_resize_cq(&iwucq->verbs_cq.cq, cqe_needed, &cmd.ibv_cmd, 2076 sizeof(cmd), &resp, sizeof(resp)); 2077 if (ret) 2078 goto err_resize; 2079 2080 memcpy(&cq_buf->cq, &iwucq->cq, sizeof(cq_buf->cq)); 2081 cq_buf->buf_size = cq_size; 2082 cq_buf->vmr = iwucq->vmr; 2083 iwucq->vmr = new_mr; 2084 irdma_uk_cq_resize(&iwucq->cq, cq_base, cqe_needed); 2085 iwucq->verbs_cq.cq.cqe = cqe; 2086 LIST_INSERT_HEAD(&iwucq->resize_list, cq_buf, list); 2087 2088 pthread_spin_unlock(&iwucq->lock); 2089 2090 return ret; 2091 2092 err_resize: 2093 pthread_spin_unlock(&iwucq->lock); 2094 err_lock: 2095 ibv_cmd_dereg_mr(&new_mr.ibv_mr); 2096 err_dereg_mr: 2097 free(cq_buf); 2098 err_buf: 2099 fprintf(stderr, "failed to resize CQ cq_id=%d ret=%d\n", iwucq->cq.cq_id, ret); 2100 irdma_free_hw_buf(cq_base, cq_size); 2101 return ret; 2102 } 2103