1 /* 2 * Copyright (c) 2012 Mellanox Technologies, Inc. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 #include <config.h> 34 35 #include <stdlib.h> 36 #include <stdio.h> 37 #include <string.h> 38 #include <pthread.h> 39 #include <errno.h> 40 #include <limits.h> 41 #include <sys/types.h> 42 #include <sys/stat.h> 43 #include <fcntl.h> 44 #include <unistd.h> 45 #include <sys/mman.h> 46 47 #include "mlx5.h" 48 #include "mlx5-abi.h" 49 #include "wqe.h" 50 51 int mlx5_single_threaded = 0; 52 53 static inline int is_xrc_tgt(int type) 54 { 55 return type == IBV_QPT_XRC_RECV; 56 } 57 58 int mlx5_query_device(struct ibv_context *context, struct ibv_device_attr *attr) 59 { 60 struct ibv_query_device cmd; 61 uint64_t raw_fw_ver; 62 unsigned major, minor, sub_minor; 63 int ret; 64 65 ret = ibv_cmd_query_device(context, attr, &raw_fw_ver, &cmd, sizeof cmd); 66 if (ret) 67 return ret; 68 69 major = (raw_fw_ver >> 32) & 0xffff; 70 minor = (raw_fw_ver >> 16) & 0xffff; 71 sub_minor = raw_fw_ver & 0xffff; 72 73 snprintf(attr->fw_ver, sizeof attr->fw_ver, 74 "%d.%d.%04d", major, minor, sub_minor); 75 76 return 0; 77 } 78 79 #define READL(ptr) (*((uint32_t *)(ptr))) 80 static int mlx5_read_clock(struct ibv_context *context, uint64_t *cycles) 81 { 82 unsigned int clockhi, clocklo, clockhi1; 83 int i; 84 struct mlx5_context *ctx = to_mctx(context); 85 86 if (!ctx->hca_core_clock) 87 return -EOPNOTSUPP; 88 89 /* Handle wraparound */ 90 for (i = 0; i < 2; i++) { 91 clockhi = be32toh(READL(ctx->hca_core_clock)); 92 clocklo = be32toh(READL(ctx->hca_core_clock + 4)); 93 clockhi1 = be32toh(READL(ctx->hca_core_clock)); 94 if (clockhi == clockhi1) 95 break; 96 } 97 98 *cycles = (uint64_t)clockhi << 32 | (uint64_t)clocklo; 99 100 return 0; 101 } 102 103 int mlx5_query_rt_values(struct ibv_context *context, 104 struct ibv_values_ex *values) 105 { 106 uint32_t comp_mask = 0; 107 int err = 0; 108 109 if (values->comp_mask & IBV_VALUES_MASK_RAW_CLOCK) { 110 uint64_t cycles; 111 112 err = mlx5_read_clock(context, &cycles); 113 if (!err) { 114 values->raw_clock.tv_sec = 0; 115 values->raw_clock.tv_nsec = cycles; 116 comp_mask |= IBV_VALUES_MASK_RAW_CLOCK; 117 } 118 } 119 120 values->comp_mask = comp_mask; 121 122 return err; 123 } 124 125 int mlx5_query_port(struct ibv_context *context, uint8_t port, 126 struct ibv_port_attr *attr) 127 { 128 struct ibv_query_port cmd; 129 130 return ibv_cmd_query_port(context, port, attr, &cmd, sizeof cmd); 131 } 132 133 struct ibv_pd *mlx5_alloc_pd(struct ibv_context *context) 134 { 135 struct ibv_alloc_pd cmd; 136 struct mlx5_alloc_pd_resp resp; 137 struct mlx5_pd *pd; 138 139 pd = calloc(1, sizeof *pd); 140 if (!pd) 141 return NULL; 142 143 if (ibv_cmd_alloc_pd(context, &pd->ibv_pd, &cmd, sizeof cmd, 144 &resp.ibv_resp, sizeof resp)) { 145 free(pd); 146 return NULL; 147 } 148 149 pd->pdn = resp.pdn; 150 151 return &pd->ibv_pd; 152 } 153 154 int mlx5_free_pd(struct ibv_pd *pd) 155 { 156 int ret; 157 158 ret = ibv_cmd_dealloc_pd(pd); 159 if (ret) 160 return ret; 161 162 free(to_mpd(pd)); 163 return 0; 164 } 165 166 struct ibv_mr *mlx5_reg_mr(struct ibv_pd *pd, void *addr, size_t length, 167 int acc) 168 { 169 struct mlx5_mr *mr; 170 struct ibv_reg_mr cmd; 171 int ret; 172 enum ibv_access_flags access = (enum ibv_access_flags)acc; 173 struct ibv_reg_mr_resp resp; 174 175 mr = calloc(1, sizeof(*mr)); 176 if (!mr) 177 return NULL; 178 179 ret = ibv_cmd_reg_mr(pd, addr, length, (uintptr_t)addr, access, 180 &(mr->ibv_mr), &cmd, sizeof(cmd), &resp, 181 sizeof resp); 182 if (ret) { 183 mlx5_free_buf(&(mr->buf)); 184 free(mr); 185 return NULL; 186 } 187 mr->alloc_flags = acc; 188 189 return &mr->ibv_mr; 190 } 191 192 int mlx5_rereg_mr(struct ibv_mr *ibmr, int flags, struct ibv_pd *pd, void *addr, 193 size_t length, int access) 194 { 195 struct ibv_rereg_mr cmd; 196 struct ibv_rereg_mr_resp resp; 197 198 if (flags & IBV_REREG_MR_KEEP_VALID) 199 return ENOTSUP; 200 201 return ibv_cmd_rereg_mr(ibmr, flags, addr, length, (uintptr_t)addr, 202 access, pd, &cmd, sizeof(cmd), &resp, 203 sizeof(resp)); 204 } 205 206 int mlx5_dereg_mr(struct ibv_mr *ibmr) 207 { 208 int ret; 209 struct mlx5_mr *mr = to_mmr(ibmr); 210 211 ret = ibv_cmd_dereg_mr(ibmr); 212 if (ret) 213 return ret; 214 215 free(mr); 216 return 0; 217 } 218 219 struct ibv_mw *mlx5_alloc_mw(struct ibv_pd *pd, enum ibv_mw_type type) 220 { 221 struct ibv_mw *mw; 222 struct ibv_alloc_mw cmd; 223 struct ibv_alloc_mw_resp resp; 224 int ret; 225 226 mw = malloc(sizeof(*mw)); 227 if (!mw) 228 return NULL; 229 230 memset(mw, 0, sizeof(*mw)); 231 232 ret = ibv_cmd_alloc_mw(pd, type, mw, &cmd, sizeof(cmd), &resp, 233 sizeof(resp)); 234 if (ret) { 235 free(mw); 236 return NULL; 237 } 238 239 return mw; 240 } 241 242 int mlx5_dealloc_mw(struct ibv_mw *mw) 243 { 244 int ret; 245 struct ibv_dealloc_mw cmd; 246 247 ret = ibv_cmd_dealloc_mw(mw, &cmd, sizeof(cmd)); 248 if (ret) 249 return ret; 250 251 free(mw); 252 return 0; 253 } 254 255 int mlx5_round_up_power_of_two(long long sz) 256 { 257 long long ret; 258 259 for (ret = 1; ret < sz; ret <<= 1) 260 ; /* nothing */ 261 262 if (ret > INT_MAX) { 263 fprintf(stderr, "%s: roundup overflow\n", __func__); 264 return -ENOMEM; 265 } 266 267 return (int)ret; 268 } 269 270 static int align_queue_size(long long req) 271 { 272 return mlx5_round_up_power_of_two(req); 273 } 274 275 static int get_cqe_size(void) 276 { 277 char *env; 278 int size = 64; 279 280 env = getenv("MLX5_CQE_SIZE"); 281 if (env) 282 size = atoi(env); 283 284 switch (size) { 285 case 64: 286 case 128: 287 return size; 288 289 default: 290 return -EINVAL; 291 } 292 } 293 294 static int use_scatter_to_cqe(void) 295 { 296 char *env; 297 298 env = getenv("MLX5_SCATTER_TO_CQE"); 299 if (env && !strcmp(env, "0")) 300 return 0; 301 302 return 1; 303 } 304 305 static int srq_sig_enabled(void) 306 { 307 char *env; 308 309 env = getenv("MLX5_SRQ_SIGNATURE"); 310 if (env) 311 return 1; 312 313 return 0; 314 } 315 316 static int qp_sig_enabled(void) 317 { 318 char *env; 319 320 env = getenv("MLX5_QP_SIGNATURE"); 321 if (env) 322 return 1; 323 324 return 0; 325 } 326 327 enum { 328 CREATE_CQ_SUPPORTED_WC_FLAGS = IBV_WC_STANDARD_FLAGS | 329 IBV_WC_EX_WITH_COMPLETION_TIMESTAMP | 330 IBV_WC_EX_WITH_CVLAN | 331 IBV_WC_EX_WITH_FLOW_TAG 332 }; 333 334 enum { 335 CREATE_CQ_SUPPORTED_COMP_MASK = IBV_CQ_INIT_ATTR_MASK_FLAGS 336 }; 337 338 enum { 339 CREATE_CQ_SUPPORTED_FLAGS = IBV_CREATE_CQ_ATTR_SINGLE_THREADED 340 }; 341 342 static struct ibv_cq_ex *create_cq(struct ibv_context *context, 343 const struct ibv_cq_init_attr_ex *cq_attr, 344 int cq_alloc_flags, 345 struct mlx5dv_cq_init_attr *mlx5cq_attr) 346 { 347 struct mlx5_create_cq cmd; 348 struct mlx5_create_cq_resp resp; 349 struct mlx5_cq *cq; 350 int cqe_sz; 351 int ret; 352 int ncqe; 353 struct mlx5_context *mctx = to_mctx(context); 354 FILE *fp = to_mctx(context)->dbg_fp; 355 356 if (!cq_attr->cqe) { 357 mlx5_dbg(fp, MLX5_DBG_CQ, "CQE invalid\n"); 358 errno = EINVAL; 359 return NULL; 360 } 361 362 if (cq_attr->comp_mask & ~CREATE_CQ_SUPPORTED_COMP_MASK) { 363 mlx5_dbg(fp, MLX5_DBG_CQ, 364 "Unsupported comp_mask for create_cq\n"); 365 errno = EINVAL; 366 return NULL; 367 } 368 369 if (cq_attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_FLAGS && 370 cq_attr->flags & ~CREATE_CQ_SUPPORTED_FLAGS) { 371 mlx5_dbg(fp, MLX5_DBG_CQ, 372 "Unsupported creation flags requested for create_cq\n"); 373 errno = EINVAL; 374 return NULL; 375 } 376 377 if (cq_attr->wc_flags & ~CREATE_CQ_SUPPORTED_WC_FLAGS) { 378 mlx5_dbg(fp, MLX5_DBG_CQ, "\n"); 379 errno = ENOTSUP; 380 return NULL; 381 } 382 383 cq = calloc(1, sizeof *cq); 384 if (!cq) { 385 mlx5_dbg(fp, MLX5_DBG_CQ, "\n"); 386 return NULL; 387 } 388 389 memset(&cmd, 0, sizeof cmd); 390 cq->cons_index = 0; 391 392 if (mlx5_spinlock_init(&cq->lock)) 393 goto err; 394 395 ncqe = align_queue_size(cq_attr->cqe + 1); 396 if ((ncqe > (1 << 24)) || (ncqe < (cq_attr->cqe + 1))) { 397 mlx5_dbg(fp, MLX5_DBG_CQ, "ncqe %d\n", ncqe); 398 errno = EINVAL; 399 goto err_spl; 400 } 401 402 cqe_sz = get_cqe_size(); 403 if (cqe_sz < 0) { 404 mlx5_dbg(fp, MLX5_DBG_CQ, "\n"); 405 errno = -cqe_sz; 406 goto err_spl; 407 } 408 409 if (mlx5_alloc_cq_buf(to_mctx(context), cq, &cq->buf_a, ncqe, cqe_sz)) { 410 mlx5_dbg(fp, MLX5_DBG_CQ, "\n"); 411 goto err_spl; 412 } 413 414 cq->dbrec = mlx5_alloc_dbrec(to_mctx(context)); 415 if (!cq->dbrec) { 416 mlx5_dbg(fp, MLX5_DBG_CQ, "\n"); 417 goto err_buf; 418 } 419 420 cq->dbrec[MLX5_CQ_SET_CI] = 0; 421 cq->dbrec[MLX5_CQ_ARM_DB] = 0; 422 cq->arm_sn = 0; 423 cq->cqe_sz = cqe_sz; 424 cq->flags = cq_alloc_flags; 425 426 if (cq_attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_FLAGS && 427 cq_attr->flags & IBV_CREATE_CQ_ATTR_SINGLE_THREADED) 428 cq->flags |= MLX5_CQ_FLAGS_SINGLE_THREADED; 429 cmd.buf_addr = (uintptr_t) cq->buf_a.buf; 430 cmd.db_addr = (uintptr_t) cq->dbrec; 431 cmd.cqe_size = cqe_sz; 432 433 if (mlx5cq_attr) { 434 if (mlx5cq_attr->comp_mask & ~(MLX5DV_CQ_INIT_ATTR_MASK_RESERVED - 1)) { 435 mlx5_dbg(fp, MLX5_DBG_CQ, 436 "Unsupported vendor comp_mask for create_cq\n"); 437 errno = EINVAL; 438 goto err_db; 439 } 440 441 if (mlx5cq_attr->comp_mask & MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE) { 442 if (mctx->cqe_comp_caps.max_num && 443 (mlx5cq_attr->cqe_comp_res_format & 444 mctx->cqe_comp_caps.supported_format)) { 445 cmd.cqe_comp_en = 1; 446 cmd.cqe_comp_res_format = mlx5cq_attr->cqe_comp_res_format; 447 } else { 448 mlx5_dbg(fp, MLX5_DBG_CQ, "CQE Compression is not supported\n"); 449 errno = EINVAL; 450 goto err_db; 451 } 452 } 453 } 454 455 ret = ibv_cmd_create_cq(context, ncqe - 1, cq_attr->channel, 456 cq_attr->comp_vector, 457 ibv_cq_ex_to_cq(&cq->ibv_cq), &cmd.ibv_cmd, 458 sizeof(cmd), &resp.ibv_resp, sizeof(resp)); 459 if (ret) { 460 mlx5_dbg(fp, MLX5_DBG_CQ, "ret %d\n", ret); 461 goto err_db; 462 } 463 464 cq->active_buf = &cq->buf_a; 465 cq->resize_buf = NULL; 466 cq->cqn = resp.cqn; 467 cq->stall_enable = to_mctx(context)->stall_enable; 468 cq->stall_adaptive_enable = to_mctx(context)->stall_adaptive_enable; 469 cq->stall_cycles = to_mctx(context)->stall_cycles; 470 471 if (cq_alloc_flags & MLX5_CQ_FLAGS_EXTENDED) 472 mlx5_cq_fill_pfns(cq, cq_attr); 473 474 return &cq->ibv_cq; 475 476 err_db: 477 mlx5_free_db(to_mctx(context), cq->dbrec); 478 479 err_buf: 480 mlx5_free_cq_buf(to_mctx(context), &cq->buf_a); 481 482 err_spl: 483 mlx5_spinlock_destroy(&cq->lock); 484 485 err: 486 free(cq); 487 488 return NULL; 489 } 490 491 struct ibv_cq *mlx5_create_cq(struct ibv_context *context, int cqe, 492 struct ibv_comp_channel *channel, 493 int comp_vector) 494 { 495 struct ibv_cq_ex *cq; 496 struct ibv_cq_init_attr_ex cq_attr = {.cqe = cqe, .channel = channel, 497 .comp_vector = comp_vector, 498 .wc_flags = IBV_WC_STANDARD_FLAGS}; 499 500 if (cqe <= 0) { 501 errno = EINVAL; 502 return NULL; 503 } 504 505 cq = create_cq(context, &cq_attr, 0, NULL); 506 return cq ? ibv_cq_ex_to_cq(cq) : NULL; 507 } 508 509 struct ibv_cq_ex *mlx5_create_cq_ex(struct ibv_context *context, 510 struct ibv_cq_init_attr_ex *cq_attr) 511 { 512 return create_cq(context, cq_attr, MLX5_CQ_FLAGS_EXTENDED, NULL); 513 } 514 515 struct ibv_cq_ex *mlx5dv_create_cq(struct ibv_context *context, 516 struct ibv_cq_init_attr_ex *cq_attr, 517 struct mlx5dv_cq_init_attr *mlx5_cq_attr) 518 { 519 struct ibv_cq_ex *cq; 520 int err = 0; 521 522 cq = create_cq(context, cq_attr, MLX5_CQ_FLAGS_EXTENDED, mlx5_cq_attr); 523 if (!cq) 524 return NULL; 525 526 err = verbs_init_cq(ibv_cq_ex_to_cq(cq), context, 527 cq_attr->channel, cq_attr->cq_context); 528 if (err) 529 goto err; 530 531 return cq; 532 533 err: 534 context->ops.destroy_cq(ibv_cq_ex_to_cq(cq)); 535 536 return NULL; 537 } 538 539 int mlx5_resize_cq(struct ibv_cq *ibcq, int cqe) 540 { 541 struct mlx5_cq *cq = to_mcq(ibcq); 542 struct mlx5_resize_cq_resp resp; 543 struct mlx5_resize_cq cmd; 544 struct mlx5_context *mctx = to_mctx(ibcq->context); 545 int err; 546 547 if (cqe < 0) { 548 errno = EINVAL; 549 return errno; 550 } 551 552 memset(&cmd, 0, sizeof(cmd)); 553 memset(&resp, 0, sizeof(resp)); 554 555 if (((long long)cqe * 64) > INT_MAX) 556 return EINVAL; 557 558 mlx5_spin_lock(&cq->lock); 559 cq->active_cqes = cq->ibv_cq.cqe; 560 if (cq->active_buf == &cq->buf_a) 561 cq->resize_buf = &cq->buf_b; 562 else 563 cq->resize_buf = &cq->buf_a; 564 565 cqe = align_queue_size(cqe + 1); 566 if (cqe == ibcq->cqe + 1) { 567 cq->resize_buf = NULL; 568 err = 0; 569 goto out; 570 } 571 572 /* currently we don't change cqe size */ 573 cq->resize_cqe_sz = cq->cqe_sz; 574 cq->resize_cqes = cqe; 575 err = mlx5_alloc_cq_buf(mctx, cq, cq->resize_buf, cq->resize_cqes, cq->resize_cqe_sz); 576 if (err) { 577 cq->resize_buf = NULL; 578 errno = ENOMEM; 579 goto out; 580 } 581 582 cmd.buf_addr = (uintptr_t)cq->resize_buf->buf; 583 cmd.cqe_size = cq->resize_cqe_sz; 584 585 err = ibv_cmd_resize_cq(ibcq, cqe - 1, &cmd.ibv_cmd, sizeof(cmd), 586 &resp.ibv_resp, sizeof(resp)); 587 if (err) 588 goto out_buf; 589 590 mlx5_cq_resize_copy_cqes(cq); 591 mlx5_free_cq_buf(mctx, cq->active_buf); 592 cq->active_buf = cq->resize_buf; 593 cq->ibv_cq.cqe = cqe - 1; 594 mlx5_spin_unlock(&cq->lock); 595 cq->resize_buf = NULL; 596 return 0; 597 598 out_buf: 599 mlx5_free_cq_buf(mctx, cq->resize_buf); 600 cq->resize_buf = NULL; 601 602 out: 603 mlx5_spin_unlock(&cq->lock); 604 return err; 605 } 606 607 int mlx5_destroy_cq(struct ibv_cq *cq) 608 { 609 int ret; 610 struct mlx5_cq *mcq = to_mcq(cq); 611 612 ret = ibv_cmd_destroy_cq(cq); 613 if (ret) 614 return ret; 615 616 verbs_cleanup_cq(cq); 617 mlx5_free_db(to_mctx(cq->context), to_mcq(cq)->dbrec); 618 mlx5_free_cq_buf(to_mctx(cq->context), to_mcq(cq)->active_buf); 619 mlx5_spinlock_destroy(&mcq->lock); 620 free(to_mcq(cq)); 621 622 return 0; 623 } 624 625 struct ibv_srq *mlx5_create_srq(struct ibv_pd *pd, 626 struct ibv_srq_init_attr *attr) 627 { 628 struct mlx5_create_srq cmd; 629 struct mlx5_create_srq_resp resp; 630 struct mlx5_srq *srq; 631 int ret; 632 struct mlx5_context *ctx; 633 int max_sge; 634 struct ibv_srq *ibsrq; 635 636 ctx = to_mctx(pd->context); 637 srq = calloc(1, sizeof *srq); 638 if (!srq) { 639 fprintf(stderr, "%s-%d:\n", __func__, __LINE__); 640 return NULL; 641 } 642 ibsrq = &srq->vsrq.srq; 643 644 memset(&cmd, 0, sizeof cmd); 645 if (mlx5_spinlock_init(&srq->lock)) { 646 fprintf(stderr, "%s-%d:\n", __func__, __LINE__); 647 goto err; 648 } 649 650 if (attr->attr.max_wr > ctx->max_srq_recv_wr) { 651 fprintf(stderr, "%s-%d:max_wr %d, max_srq_recv_wr %d\n", __func__, __LINE__, 652 attr->attr.max_wr, ctx->max_srq_recv_wr); 653 errno = EINVAL; 654 goto err_spl; 655 } 656 657 /* 658 * this calculation does not consider required control segments. The 659 * final calculation is done again later. This is done so to avoid 660 * overflows of variables 661 */ 662 max_sge = ctx->max_rq_desc_sz / sizeof(struct mlx5_wqe_data_seg); 663 if (attr->attr.max_sge > max_sge) { 664 fprintf(stderr, "%s-%d:max_wr %d, max_srq_recv_wr %d\n", __func__, __LINE__, 665 attr->attr.max_wr, ctx->max_srq_recv_wr); 666 errno = EINVAL; 667 goto err_spl; 668 } 669 670 srq->max = align_queue_size(attr->attr.max_wr + 1); 671 srq->max_gs = attr->attr.max_sge; 672 srq->counter = 0; 673 674 if (mlx5_alloc_srq_buf(pd->context, srq)) { 675 fprintf(stderr, "%s-%d:\n", __func__, __LINE__); 676 goto err_spl; 677 } 678 679 srq->db = mlx5_alloc_dbrec(to_mctx(pd->context)); 680 if (!srq->db) { 681 fprintf(stderr, "%s-%d:\n", __func__, __LINE__); 682 goto err_free; 683 } 684 685 *srq->db = 0; 686 687 cmd.buf_addr = (uintptr_t) srq->buf.buf; 688 cmd.db_addr = (uintptr_t) srq->db; 689 srq->wq_sig = srq_sig_enabled(); 690 if (srq->wq_sig) 691 cmd.flags = MLX5_SRQ_FLAG_SIGNATURE; 692 693 attr->attr.max_sge = srq->max_gs; 694 pthread_mutex_lock(&ctx->srq_table_mutex); 695 ret = ibv_cmd_create_srq(pd, ibsrq, attr, &cmd.ibv_cmd, sizeof(cmd), 696 &resp.ibv_resp, sizeof(resp)); 697 if (ret) 698 goto err_db; 699 700 ret = mlx5_store_srq(ctx, resp.srqn, srq); 701 if (ret) 702 goto err_destroy; 703 704 pthread_mutex_unlock(&ctx->srq_table_mutex); 705 706 srq->srqn = resp.srqn; 707 srq->rsc.rsn = resp.srqn; 708 srq->rsc.type = MLX5_RSC_TYPE_SRQ; 709 710 return ibsrq; 711 712 err_destroy: 713 ibv_cmd_destroy_srq(ibsrq); 714 715 err_db: 716 pthread_mutex_unlock(&ctx->srq_table_mutex); 717 mlx5_free_db(to_mctx(pd->context), srq->db); 718 719 err_free: 720 free(srq->wrid); 721 mlx5_free_buf(&srq->buf); 722 723 err_spl: 724 mlx5_spinlock_destroy(&srq->lock); 725 726 err: 727 free(srq); 728 729 return NULL; 730 } 731 732 int mlx5_modify_srq(struct ibv_srq *srq, 733 struct ibv_srq_attr *attr, 734 int attr_mask) 735 { 736 struct ibv_modify_srq cmd; 737 738 return ibv_cmd_modify_srq(srq, attr, attr_mask, &cmd, sizeof cmd); 739 } 740 741 int mlx5_query_srq(struct ibv_srq *srq, 742 struct ibv_srq_attr *attr) 743 { 744 struct ibv_query_srq cmd; 745 746 return ibv_cmd_query_srq(srq, attr, &cmd, sizeof cmd); 747 } 748 749 int mlx5_destroy_srq(struct ibv_srq *srq) 750 { 751 int ret; 752 struct mlx5_srq *msrq = to_msrq(srq); 753 struct mlx5_context *ctx = to_mctx(srq->context); 754 755 ret = ibv_cmd_destroy_srq(srq); 756 if (ret) 757 return ret; 758 759 if (ctx->cqe_version && msrq->rsc.type == MLX5_RSC_TYPE_XSRQ) 760 mlx5_clear_uidx(ctx, msrq->rsc.rsn); 761 else 762 mlx5_clear_srq(ctx, msrq->srqn); 763 764 mlx5_free_db(ctx, msrq->db); 765 mlx5_free_buf(&msrq->buf); 766 free(msrq->wrid); 767 mlx5_spinlock_destroy(&msrq->lock); 768 free(msrq); 769 770 return 0; 771 } 772 773 static int sq_overhead(enum ibv_qp_type qp_type) 774 { 775 size_t size = 0; 776 size_t mw_bind_size = 777 sizeof(struct mlx5_wqe_umr_ctrl_seg) + 778 sizeof(struct mlx5_wqe_mkey_context_seg) + 779 max_t(size_t, sizeof(struct mlx5_wqe_umr_klm_seg), 64); 780 781 switch (qp_type) { 782 case IBV_QPT_RC: 783 size += sizeof(struct mlx5_wqe_ctrl_seg) + 784 max(sizeof(struct mlx5_wqe_atomic_seg) + 785 sizeof(struct mlx5_wqe_raddr_seg), 786 mw_bind_size); 787 break; 788 789 case IBV_QPT_UC: 790 size = sizeof(struct mlx5_wqe_ctrl_seg) + 791 max(sizeof(struct mlx5_wqe_raddr_seg), 792 mw_bind_size); 793 break; 794 795 case IBV_QPT_UD: 796 size = sizeof(struct mlx5_wqe_ctrl_seg) + 797 sizeof(struct mlx5_wqe_datagram_seg); 798 break; 799 800 case IBV_QPT_XRC_SEND: 801 size = sizeof(struct mlx5_wqe_ctrl_seg) + mw_bind_size; 802 SWITCH_FALLTHROUGH; 803 804 case IBV_QPT_XRC_RECV: 805 size = max(size, sizeof(struct mlx5_wqe_ctrl_seg) + 806 sizeof(struct mlx5_wqe_xrc_seg) + 807 sizeof(struct mlx5_wqe_raddr_seg)); 808 break; 809 810 case IBV_QPT_RAW_PACKET: 811 size = sizeof(struct mlx5_wqe_ctrl_seg) + 812 sizeof(struct mlx5_wqe_eth_seg); 813 break; 814 815 default: 816 return -EINVAL; 817 } 818 819 return size; 820 } 821 822 static int mlx5_calc_send_wqe(struct mlx5_context *ctx, 823 struct ibv_qp_init_attr_ex *attr, 824 struct mlx5_qp *qp) 825 { 826 int size; 827 int inl_size = 0; 828 int max_gather; 829 int tot_size; 830 831 size = sq_overhead(attr->qp_type); 832 if (size < 0) 833 return size; 834 835 if (attr->cap.max_inline_data) { 836 inl_size = size + align(sizeof(struct mlx5_wqe_inl_data_seg) + 837 attr->cap.max_inline_data, 16); 838 } 839 840 if (attr->comp_mask & IBV_QP_INIT_ATTR_MAX_TSO_HEADER) { 841 size += align(attr->max_tso_header, 16); 842 qp->max_tso_header = attr->max_tso_header; 843 } 844 845 max_gather = (ctx->max_sq_desc_sz - size) / 846 sizeof(struct mlx5_wqe_data_seg); 847 if (attr->cap.max_send_sge > max_gather) 848 return -EINVAL; 849 850 size += attr->cap.max_send_sge * sizeof(struct mlx5_wqe_data_seg); 851 tot_size = max_int(size, inl_size); 852 853 if (tot_size > ctx->max_sq_desc_sz) 854 return -EINVAL; 855 856 return align(tot_size, MLX5_SEND_WQE_BB); 857 } 858 859 static int mlx5_calc_rcv_wqe(struct mlx5_context *ctx, 860 struct ibv_qp_init_attr_ex *attr, 861 struct mlx5_qp *qp) 862 { 863 uint32_t size; 864 int num_scatter; 865 866 if (attr->srq) 867 return 0; 868 869 num_scatter = max_t(uint32_t, attr->cap.max_recv_sge, 1); 870 size = sizeof(struct mlx5_wqe_data_seg) * num_scatter; 871 if (qp->wq_sig) 872 size += sizeof(struct mlx5_rwqe_sig); 873 874 if (size > ctx->max_rq_desc_sz) 875 return -EINVAL; 876 877 size = mlx5_round_up_power_of_two(size); 878 879 return size; 880 } 881 882 static int mlx5_calc_sq_size(struct mlx5_context *ctx, 883 struct ibv_qp_init_attr_ex *attr, 884 struct mlx5_qp *qp) 885 { 886 int wqe_size; 887 int wq_size; 888 FILE *fp = ctx->dbg_fp; 889 890 if (!attr->cap.max_send_wr) 891 return 0; 892 893 wqe_size = mlx5_calc_send_wqe(ctx, attr, qp); 894 if (wqe_size < 0) { 895 mlx5_dbg(fp, MLX5_DBG_QP, "\n"); 896 return wqe_size; 897 } 898 899 if (wqe_size > ctx->max_sq_desc_sz) { 900 mlx5_dbg(fp, MLX5_DBG_QP, "\n"); 901 return -EINVAL; 902 } 903 904 qp->max_inline_data = wqe_size - sq_overhead(attr->qp_type) - 905 sizeof(struct mlx5_wqe_inl_data_seg); 906 attr->cap.max_inline_data = qp->max_inline_data; 907 908 /* 909 * to avoid overflow, we limit max_send_wr so 910 * that the multiplication will fit in int 911 */ 912 if (attr->cap.max_send_wr > 0x7fffffff / ctx->max_sq_desc_sz) { 913 mlx5_dbg(fp, MLX5_DBG_QP, "\n"); 914 return -EINVAL; 915 } 916 917 wq_size = mlx5_round_up_power_of_two(attr->cap.max_send_wr * wqe_size); 918 qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB; 919 if (qp->sq.wqe_cnt > ctx->max_send_wqebb) { 920 mlx5_dbg(fp, MLX5_DBG_QP, "\n"); 921 return -EINVAL; 922 } 923 924 qp->sq.wqe_shift = mlx5_ilog2(MLX5_SEND_WQE_BB); 925 qp->sq.max_gs = attr->cap.max_send_sge; 926 qp->sq.max_post = wq_size / wqe_size; 927 928 return wq_size; 929 } 930 931 static int mlx5_calc_rwq_size(struct mlx5_context *ctx, 932 struct mlx5_rwq *rwq, 933 struct ibv_wq_init_attr *attr) 934 { 935 size_t wqe_size; 936 int wq_size; 937 uint32_t num_scatter; 938 int scat_spc; 939 940 if (!attr->max_wr) 941 return -EINVAL; 942 943 /* TBD: check caps for RQ */ 944 num_scatter = max_t(uint32_t, attr->max_sge, 1); 945 wqe_size = sizeof(struct mlx5_wqe_data_seg) * num_scatter; 946 947 if (rwq->wq_sig) 948 wqe_size += sizeof(struct mlx5_rwqe_sig); 949 950 if (wqe_size <= 0 || wqe_size > ctx->max_rq_desc_sz) 951 return -EINVAL; 952 953 wqe_size = mlx5_round_up_power_of_two(wqe_size); 954 wq_size = mlx5_round_up_power_of_two(attr->max_wr) * wqe_size; 955 wq_size = max(wq_size, MLX5_SEND_WQE_BB); 956 rwq->rq.wqe_cnt = wq_size / wqe_size; 957 rwq->rq.wqe_shift = mlx5_ilog2(wqe_size); 958 rwq->rq.max_post = 1 << mlx5_ilog2(wq_size / wqe_size); 959 scat_spc = wqe_size - 960 ((rwq->wq_sig) ? sizeof(struct mlx5_rwqe_sig) : 0); 961 rwq->rq.max_gs = scat_spc / sizeof(struct mlx5_wqe_data_seg); 962 return wq_size; 963 } 964 965 static int mlx5_calc_rq_size(struct mlx5_context *ctx, 966 struct ibv_qp_init_attr_ex *attr, 967 struct mlx5_qp *qp) 968 { 969 int wqe_size; 970 int wq_size; 971 int scat_spc; 972 FILE *fp = ctx->dbg_fp; 973 974 if (!attr->cap.max_recv_wr) 975 return 0; 976 977 if (attr->cap.max_recv_wr > ctx->max_recv_wr) { 978 mlx5_dbg(fp, MLX5_DBG_QP, "\n"); 979 return -EINVAL; 980 } 981 982 wqe_size = mlx5_calc_rcv_wqe(ctx, attr, qp); 983 if (wqe_size < 0 || wqe_size > ctx->max_rq_desc_sz) { 984 mlx5_dbg(fp, MLX5_DBG_QP, "\n"); 985 return -EINVAL; 986 } 987 988 wq_size = mlx5_round_up_power_of_two(attr->cap.max_recv_wr) * wqe_size; 989 if (wqe_size) { 990 wq_size = max(wq_size, MLX5_SEND_WQE_BB); 991 qp->rq.wqe_cnt = wq_size / wqe_size; 992 qp->rq.wqe_shift = mlx5_ilog2(wqe_size); 993 qp->rq.max_post = 1 << mlx5_ilog2(wq_size / wqe_size); 994 scat_spc = wqe_size - 995 (qp->wq_sig ? sizeof(struct mlx5_rwqe_sig) : 0); 996 qp->rq.max_gs = scat_spc / sizeof(struct mlx5_wqe_data_seg); 997 } else { 998 qp->rq.wqe_cnt = 0; 999 qp->rq.wqe_shift = 0; 1000 qp->rq.max_post = 0; 1001 qp->rq.max_gs = 0; 1002 } 1003 return wq_size; 1004 } 1005 1006 static int mlx5_calc_wq_size(struct mlx5_context *ctx, 1007 struct ibv_qp_init_attr_ex *attr, 1008 struct mlx5_qp *qp) 1009 { 1010 int ret; 1011 int result; 1012 1013 ret = mlx5_calc_sq_size(ctx, attr, qp); 1014 if (ret < 0) 1015 return ret; 1016 1017 result = ret; 1018 ret = mlx5_calc_rq_size(ctx, attr, qp); 1019 if (ret < 0) 1020 return ret; 1021 1022 result += ret; 1023 1024 qp->sq.offset = ret; 1025 qp->rq.offset = 0; 1026 1027 return result; 1028 } 1029 1030 static void map_uuar(struct ibv_context *context, struct mlx5_qp *qp, 1031 int uuar_index) 1032 { 1033 struct mlx5_context *ctx = to_mctx(context); 1034 1035 qp->bf = &ctx->bfs[uuar_index]; 1036 } 1037 1038 static const char *qptype2key(enum ibv_qp_type type) 1039 { 1040 switch (type) { 1041 case IBV_QPT_RC: return "HUGE_RC"; 1042 case IBV_QPT_UC: return "HUGE_UC"; 1043 case IBV_QPT_UD: return "HUGE_UD"; 1044 case IBV_QPT_RAW_PACKET: return "HUGE_RAW_ETH"; 1045 default: return "HUGE_NA"; 1046 } 1047 } 1048 1049 static int mlx5_alloc_qp_buf(struct ibv_context *context, 1050 struct ibv_qp_init_attr_ex *attr, 1051 struct mlx5_qp *qp, 1052 int size) 1053 { 1054 int err; 1055 enum mlx5_alloc_type alloc_type; 1056 enum mlx5_alloc_type default_alloc_type = MLX5_ALLOC_TYPE_ANON; 1057 const char *qp_huge_key; 1058 1059 if (qp->sq.wqe_cnt) { 1060 qp->sq.wrid = malloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wrid)); 1061 if (!qp->sq.wrid) { 1062 errno = ENOMEM; 1063 err = -1; 1064 return err; 1065 } 1066 1067 qp->sq.wr_data = malloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wr_data)); 1068 if (!qp->sq.wr_data) { 1069 errno = ENOMEM; 1070 err = -1; 1071 goto ex_wrid; 1072 } 1073 } 1074 1075 qp->sq.wqe_head = malloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wqe_head)); 1076 if (!qp->sq.wqe_head) { 1077 errno = ENOMEM; 1078 err = -1; 1079 goto ex_wrid; 1080 } 1081 1082 if (qp->rq.wqe_cnt) { 1083 qp->rq.wrid = malloc(qp->rq.wqe_cnt * sizeof(uint64_t)); 1084 if (!qp->rq.wrid) { 1085 errno = ENOMEM; 1086 err = -1; 1087 goto ex_wrid; 1088 } 1089 } 1090 1091 /* compatibility support */ 1092 qp_huge_key = qptype2key(qp->ibv_qp->qp_type); 1093 if (mlx5_use_huge(qp_huge_key)) 1094 default_alloc_type = MLX5_ALLOC_TYPE_HUGE; 1095 1096 mlx5_get_alloc_type(MLX5_QP_PREFIX, &alloc_type, 1097 default_alloc_type); 1098 1099 err = mlx5_alloc_prefered_buf(to_mctx(context), &qp->buf, 1100 align(qp->buf_size, to_mdev 1101 (context->device)->page_size), 1102 to_mdev(context->device)->page_size, 1103 alloc_type, 1104 MLX5_QP_PREFIX); 1105 1106 if (err) { 1107 err = -ENOMEM; 1108 goto ex_wrid; 1109 } 1110 1111 memset(qp->buf.buf, 0, qp->buf_size); 1112 1113 if (attr->qp_type == IBV_QPT_RAW_PACKET) { 1114 size_t aligned_sq_buf_size = align(qp->sq_buf_size, 1115 to_mdev(context->device)->page_size); 1116 /* For Raw Packet QP, allocate a separate buffer for the SQ */ 1117 err = mlx5_alloc_prefered_buf(to_mctx(context), &qp->sq_buf, 1118 aligned_sq_buf_size, 1119 to_mdev(context->device)->page_size, 1120 alloc_type, 1121 MLX5_QP_PREFIX); 1122 if (err) { 1123 err = -ENOMEM; 1124 goto rq_buf; 1125 } 1126 1127 memset(qp->sq_buf.buf, 0, aligned_sq_buf_size); 1128 } 1129 1130 return 0; 1131 rq_buf: 1132 mlx5_free_actual_buf(to_mctx(qp->verbs_qp.qp.context), &qp->buf); 1133 ex_wrid: 1134 if (qp->rq.wrid) 1135 free(qp->rq.wrid); 1136 1137 if (qp->sq.wqe_head) 1138 free(qp->sq.wqe_head); 1139 1140 if (qp->sq.wr_data) 1141 free(qp->sq.wr_data); 1142 if (qp->sq.wrid) 1143 free(qp->sq.wrid); 1144 1145 return err; 1146 } 1147 1148 static void mlx5_free_qp_buf(struct mlx5_qp *qp) 1149 { 1150 struct mlx5_context *ctx = to_mctx(qp->ibv_qp->context); 1151 1152 mlx5_free_actual_buf(ctx, &qp->buf); 1153 1154 if (qp->sq_buf.buf) 1155 mlx5_free_actual_buf(ctx, &qp->sq_buf); 1156 1157 if (qp->rq.wrid) 1158 free(qp->rq.wrid); 1159 1160 if (qp->sq.wqe_head) 1161 free(qp->sq.wqe_head); 1162 1163 if (qp->sq.wrid) 1164 free(qp->sq.wrid); 1165 1166 if (qp->sq.wr_data) 1167 free(qp->sq.wr_data); 1168 } 1169 1170 static int mlx5_cmd_create_rss_qp(struct ibv_context *context, 1171 struct ibv_qp_init_attr_ex *attr, 1172 struct mlx5_qp *qp) 1173 { 1174 struct mlx5_create_qp_ex_rss cmd_ex_rss = {}; 1175 struct mlx5_create_qp_resp_ex resp = {}; 1176 int ret; 1177 1178 if (attr->rx_hash_conf.rx_hash_key_len > sizeof(cmd_ex_rss.rx_hash_key)) { 1179 errno = EINVAL; 1180 return errno; 1181 } 1182 1183 cmd_ex_rss.rx_hash_fields_mask = attr->rx_hash_conf.rx_hash_fields_mask; 1184 cmd_ex_rss.rx_hash_function = attr->rx_hash_conf.rx_hash_function; 1185 cmd_ex_rss.rx_key_len = attr->rx_hash_conf.rx_hash_key_len; 1186 memcpy(cmd_ex_rss.rx_hash_key, attr->rx_hash_conf.rx_hash_key, 1187 attr->rx_hash_conf.rx_hash_key_len); 1188 1189 ret = ibv_cmd_create_qp_ex2(context, &qp->verbs_qp, 1190 sizeof(qp->verbs_qp), attr, 1191 &cmd_ex_rss.ibv_cmd, sizeof(cmd_ex_rss.ibv_cmd), 1192 sizeof(cmd_ex_rss), &resp.ibv_resp, 1193 sizeof(resp.ibv_resp), sizeof(resp)); 1194 if (ret) 1195 return ret; 1196 1197 qp->rss_qp = 1; 1198 return 0; 1199 } 1200 1201 static int mlx5_cmd_create_qp_ex(struct ibv_context *context, 1202 struct ibv_qp_init_attr_ex *attr, 1203 struct mlx5_create_qp *cmd, 1204 struct mlx5_qp *qp, 1205 struct mlx5_create_qp_resp_ex *resp) 1206 { 1207 struct mlx5_create_qp_ex cmd_ex; 1208 int ret; 1209 1210 memset(&cmd_ex, 0, sizeof(cmd_ex)); 1211 memcpy(&cmd_ex.ibv_cmd.base, &cmd->ibv_cmd.user_handle, 1212 offsetof(typeof(cmd->ibv_cmd), is_srq) + 1213 sizeof(cmd->ibv_cmd.is_srq) - 1214 offsetof(typeof(cmd->ibv_cmd), user_handle)); 1215 1216 memcpy(&cmd_ex.drv_ex, &cmd->buf_addr, 1217 offsetof(typeof(*cmd), sq_buf_addr) + 1218 sizeof(cmd->sq_buf_addr) - sizeof(cmd->ibv_cmd)); 1219 1220 ret = ibv_cmd_create_qp_ex2(context, &qp->verbs_qp, 1221 sizeof(qp->verbs_qp), attr, 1222 &cmd_ex.ibv_cmd, sizeof(cmd_ex.ibv_cmd), 1223 sizeof(cmd_ex), &resp->ibv_resp, 1224 sizeof(resp->ibv_resp), sizeof(*resp)); 1225 1226 return ret; 1227 } 1228 1229 enum { 1230 MLX5_CREATE_QP_SUP_COMP_MASK = (IBV_QP_INIT_ATTR_PD | 1231 IBV_QP_INIT_ATTR_XRCD | 1232 IBV_QP_INIT_ATTR_CREATE_FLAGS | 1233 IBV_QP_INIT_ATTR_MAX_TSO_HEADER | 1234 IBV_QP_INIT_ATTR_IND_TABLE | 1235 IBV_QP_INIT_ATTR_RX_HASH), 1236 }; 1237 1238 enum { 1239 MLX5_CREATE_QP_EX2_COMP_MASK = (IBV_QP_INIT_ATTR_CREATE_FLAGS | 1240 IBV_QP_INIT_ATTR_MAX_TSO_HEADER | 1241 IBV_QP_INIT_ATTR_IND_TABLE | 1242 IBV_QP_INIT_ATTR_RX_HASH), 1243 }; 1244 1245 static struct ibv_qp *create_qp(struct ibv_context *context, 1246 struct ibv_qp_init_attr_ex *attr) 1247 { 1248 struct mlx5_create_qp cmd; 1249 struct mlx5_create_qp_resp resp; 1250 struct mlx5_create_qp_resp_ex resp_ex; 1251 struct mlx5_qp *qp; 1252 int ret; 1253 struct mlx5_context *ctx = to_mctx(context); 1254 struct ibv_qp *ibqp; 1255 int32_t usr_idx = 0; 1256 uint32_t uuar_index; 1257 FILE *fp = ctx->dbg_fp; 1258 1259 if (attr->comp_mask & ~MLX5_CREATE_QP_SUP_COMP_MASK) 1260 return NULL; 1261 1262 if ((attr->comp_mask & IBV_QP_INIT_ATTR_MAX_TSO_HEADER) && 1263 (attr->qp_type != IBV_QPT_RAW_PACKET)) 1264 return NULL; 1265 1266 qp = calloc(1, sizeof(*qp)); 1267 if (!qp) { 1268 mlx5_dbg(fp, MLX5_DBG_QP, "\n"); 1269 return NULL; 1270 } 1271 ibqp = (struct ibv_qp *)&qp->verbs_qp; 1272 qp->ibv_qp = ibqp; 1273 1274 memset(&cmd, 0, sizeof(cmd)); 1275 memset(&resp, 0, sizeof(resp)); 1276 memset(&resp_ex, 0, sizeof(resp_ex)); 1277 1278 if (attr->comp_mask & IBV_QP_INIT_ATTR_RX_HASH) { 1279 ret = mlx5_cmd_create_rss_qp(context, attr, qp); 1280 if (ret) 1281 goto err; 1282 1283 return ibqp; 1284 } 1285 1286 qp->wq_sig = qp_sig_enabled(); 1287 if (qp->wq_sig) 1288 cmd.flags |= MLX5_QP_FLAG_SIGNATURE; 1289 1290 if (use_scatter_to_cqe()) 1291 cmd.flags |= MLX5_QP_FLAG_SCATTER_CQE; 1292 1293 ret = mlx5_calc_wq_size(ctx, attr, qp); 1294 if (ret < 0) { 1295 errno = -ret; 1296 goto err; 1297 } 1298 1299 if (attr->qp_type == IBV_QPT_RAW_PACKET) { 1300 qp->buf_size = qp->sq.offset; 1301 qp->sq_buf_size = ret - qp->buf_size; 1302 qp->sq.offset = 0; 1303 } else { 1304 qp->buf_size = ret; 1305 qp->sq_buf_size = 0; 1306 } 1307 1308 if (mlx5_alloc_qp_buf(context, attr, qp, ret)) { 1309 mlx5_dbg(fp, MLX5_DBG_QP, "\n"); 1310 goto err; 1311 } 1312 1313 if (attr->qp_type == IBV_QPT_RAW_PACKET) { 1314 qp->sq_start = qp->sq_buf.buf; 1315 qp->sq.qend = qp->sq_buf.buf + 1316 (qp->sq.wqe_cnt << qp->sq.wqe_shift); 1317 } else { 1318 qp->sq_start = qp->buf.buf + qp->sq.offset; 1319 qp->sq.qend = qp->buf.buf + qp->sq.offset + 1320 (qp->sq.wqe_cnt << qp->sq.wqe_shift); 1321 } 1322 1323 mlx5_init_qp_indices(qp); 1324 1325 if (mlx5_spinlock_init(&qp->sq.lock)) 1326 goto err_free_qp_buf; 1327 1328 if (mlx5_spinlock_init(&qp->rq.lock)) 1329 goto err_sq_spl; 1330 1331 qp->db = mlx5_alloc_dbrec(ctx); 1332 if (!qp->db) { 1333 mlx5_dbg(fp, MLX5_DBG_QP, "\n"); 1334 goto err_rq_spl; 1335 } 1336 1337 qp->db[MLX5_RCV_DBR] = 0; 1338 qp->db[MLX5_SND_DBR] = 0; 1339 1340 cmd.buf_addr = (uintptr_t) qp->buf.buf; 1341 cmd.sq_buf_addr = (attr->qp_type == IBV_QPT_RAW_PACKET) ? 1342 (uintptr_t) qp->sq_buf.buf : 0; 1343 cmd.db_addr = (uintptr_t) qp->db; 1344 cmd.sq_wqe_count = qp->sq.wqe_cnt; 1345 cmd.rq_wqe_count = qp->rq.wqe_cnt; 1346 cmd.rq_wqe_shift = qp->rq.wqe_shift; 1347 1348 if (ctx->atomic_cap == IBV_ATOMIC_HCA) 1349 qp->atomics_enabled = 1; 1350 1351 if (!ctx->cqe_version) { 1352 cmd.uidx = 0xffffff; 1353 pthread_mutex_lock(&ctx->qp_table_mutex); 1354 } else if (!is_xrc_tgt(attr->qp_type)) { 1355 usr_idx = mlx5_store_uidx(ctx, qp); 1356 if (usr_idx < 0) { 1357 mlx5_dbg(fp, MLX5_DBG_QP, "Couldn't find free user index\n"); 1358 goto err_rq_db; 1359 } 1360 1361 cmd.uidx = usr_idx; 1362 } 1363 1364 if (attr->comp_mask & MLX5_CREATE_QP_EX2_COMP_MASK) 1365 ret = mlx5_cmd_create_qp_ex(context, attr, &cmd, qp, &resp_ex); 1366 else 1367 ret = ibv_cmd_create_qp_ex(context, &qp->verbs_qp, sizeof(qp->verbs_qp), 1368 attr, &cmd.ibv_cmd, sizeof(cmd), 1369 &resp.ibv_resp, sizeof(resp)); 1370 if (ret) { 1371 mlx5_dbg(fp, MLX5_DBG_QP, "ret %d\n", ret); 1372 goto err_free_uidx; 1373 } 1374 1375 uuar_index = (attr->comp_mask & MLX5_CREATE_QP_EX2_COMP_MASK) ? 1376 resp_ex.uuar_index : resp.uuar_index; 1377 if (!ctx->cqe_version) { 1378 if (qp->sq.wqe_cnt || qp->rq.wqe_cnt) { 1379 ret = mlx5_store_qp(ctx, ibqp->qp_num, qp); 1380 if (ret) { 1381 mlx5_dbg(fp, MLX5_DBG_QP, "ret %d\n", ret); 1382 goto err_destroy; 1383 } 1384 } 1385 1386 pthread_mutex_unlock(&ctx->qp_table_mutex); 1387 } 1388 1389 map_uuar(context, qp, uuar_index); 1390 1391 qp->rq.max_post = qp->rq.wqe_cnt; 1392 if (attr->sq_sig_all) 1393 qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE; 1394 else 1395 qp->sq_signal_bits = 0; 1396 1397 attr->cap.max_send_wr = qp->sq.max_post; 1398 attr->cap.max_recv_wr = qp->rq.max_post; 1399 attr->cap.max_recv_sge = qp->rq.max_gs; 1400 1401 qp->rsc.type = MLX5_RSC_TYPE_QP; 1402 qp->rsc.rsn = (ctx->cqe_version && !is_xrc_tgt(attr->qp_type)) ? 1403 usr_idx : ibqp->qp_num; 1404 1405 return ibqp; 1406 1407 err_destroy: 1408 ibv_cmd_destroy_qp(ibqp); 1409 1410 err_free_uidx: 1411 if (!ctx->cqe_version) 1412 pthread_mutex_unlock(&to_mctx(context)->qp_table_mutex); 1413 else if (!is_xrc_tgt(attr->qp_type)) 1414 mlx5_clear_uidx(ctx, usr_idx); 1415 1416 err_rq_db: 1417 mlx5_free_db(to_mctx(context), qp->db); 1418 1419 err_rq_spl: 1420 mlx5_spinlock_destroy(&qp->rq.lock); 1421 1422 err_sq_spl: 1423 mlx5_spinlock_destroy(&qp->sq.lock); 1424 1425 err_free_qp_buf: 1426 mlx5_free_qp_buf(qp); 1427 1428 err: 1429 free(qp); 1430 1431 return NULL; 1432 } 1433 1434 struct ibv_qp *mlx5_create_qp(struct ibv_pd *pd, 1435 struct ibv_qp_init_attr *attr) 1436 { 1437 struct ibv_qp *qp; 1438 struct ibv_qp_init_attr_ex attrx; 1439 1440 memset(&attrx, 0, sizeof(attrx)); 1441 memcpy(&attrx, attr, sizeof(*attr)); 1442 attrx.comp_mask = IBV_QP_INIT_ATTR_PD; 1443 attrx.pd = pd; 1444 qp = create_qp(pd->context, &attrx); 1445 if (qp) 1446 memcpy(attr, &attrx, sizeof(*attr)); 1447 1448 return qp; 1449 } 1450 1451 static void mlx5_lock_cqs(struct ibv_qp *qp) 1452 { 1453 struct mlx5_cq *send_cq = to_mcq(qp->send_cq); 1454 struct mlx5_cq *recv_cq = to_mcq(qp->recv_cq); 1455 1456 if (send_cq && recv_cq) { 1457 if (send_cq == recv_cq) { 1458 mlx5_spin_lock(&send_cq->lock); 1459 } else if (send_cq->cqn < recv_cq->cqn) { 1460 mlx5_spin_lock(&send_cq->lock); 1461 mlx5_spin_lock(&recv_cq->lock); 1462 } else { 1463 mlx5_spin_lock(&recv_cq->lock); 1464 mlx5_spin_lock(&send_cq->lock); 1465 } 1466 } else if (send_cq) { 1467 mlx5_spin_lock(&send_cq->lock); 1468 } else if (recv_cq) { 1469 mlx5_spin_lock(&recv_cq->lock); 1470 } 1471 } 1472 1473 static void mlx5_unlock_cqs(struct ibv_qp *qp) 1474 { 1475 struct mlx5_cq *send_cq = to_mcq(qp->send_cq); 1476 struct mlx5_cq *recv_cq = to_mcq(qp->recv_cq); 1477 1478 if (send_cq && recv_cq) { 1479 if (send_cq == recv_cq) { 1480 mlx5_spin_unlock(&send_cq->lock); 1481 } else if (send_cq->cqn < recv_cq->cqn) { 1482 mlx5_spin_unlock(&recv_cq->lock); 1483 mlx5_spin_unlock(&send_cq->lock); 1484 } else { 1485 mlx5_spin_unlock(&send_cq->lock); 1486 mlx5_spin_unlock(&recv_cq->lock); 1487 } 1488 } else if (send_cq) { 1489 mlx5_spin_unlock(&send_cq->lock); 1490 } else if (recv_cq) { 1491 mlx5_spin_unlock(&recv_cq->lock); 1492 } 1493 } 1494 1495 int mlx5_destroy_qp(struct ibv_qp *ibqp) 1496 { 1497 struct mlx5_qp *qp = to_mqp(ibqp); 1498 struct mlx5_context *ctx = to_mctx(ibqp->context); 1499 int ret; 1500 1501 if (qp->rss_qp) { 1502 ret = ibv_cmd_destroy_qp(ibqp); 1503 if (ret) 1504 return ret; 1505 goto free; 1506 } 1507 1508 if (!ctx->cqe_version) 1509 pthread_mutex_lock(&ctx->qp_table_mutex); 1510 1511 ret = ibv_cmd_destroy_qp(ibqp); 1512 if (ret) { 1513 if (!ctx->cqe_version) 1514 pthread_mutex_unlock(&ctx->qp_table_mutex); 1515 return ret; 1516 } 1517 1518 mlx5_lock_cqs(ibqp); 1519 1520 __mlx5_cq_clean(to_mcq(ibqp->recv_cq), qp->rsc.rsn, 1521 ibqp->srq ? to_msrq(ibqp->srq) : NULL); 1522 if (ibqp->send_cq != ibqp->recv_cq) 1523 __mlx5_cq_clean(to_mcq(ibqp->send_cq), qp->rsc.rsn, NULL); 1524 1525 if (!ctx->cqe_version) { 1526 if (qp->sq.wqe_cnt || qp->rq.wqe_cnt) 1527 mlx5_clear_qp(ctx, ibqp->qp_num); 1528 } 1529 1530 mlx5_unlock_cqs(ibqp); 1531 if (!ctx->cqe_version) 1532 pthread_mutex_unlock(&ctx->qp_table_mutex); 1533 else if (!is_xrc_tgt(ibqp->qp_type)) 1534 mlx5_clear_uidx(ctx, qp->rsc.rsn); 1535 1536 mlx5_free_db(ctx, qp->db); 1537 mlx5_spinlock_destroy(&qp->rq.lock); 1538 mlx5_spinlock_destroy(&qp->sq.lock); 1539 mlx5_free_qp_buf(qp); 1540 free: 1541 free(qp); 1542 1543 return 0; 1544 } 1545 1546 int mlx5_query_qp(struct ibv_qp *ibqp, struct ibv_qp_attr *attr, 1547 int attr_mask, struct ibv_qp_init_attr *init_attr) 1548 { 1549 struct ibv_query_qp cmd; 1550 struct mlx5_qp *qp = to_mqp(ibqp); 1551 int ret; 1552 1553 if (qp->rss_qp) 1554 return ENOSYS; 1555 1556 ret = ibv_cmd_query_qp(ibqp, attr, attr_mask, init_attr, &cmd, sizeof(cmd)); 1557 if (ret) 1558 return ret; 1559 1560 init_attr->cap.max_send_wr = qp->sq.max_post; 1561 init_attr->cap.max_send_sge = qp->sq.max_gs; 1562 init_attr->cap.max_inline_data = qp->max_inline_data; 1563 1564 attr->cap = init_attr->cap; 1565 1566 return 0; 1567 } 1568 1569 enum { 1570 MLX5_MODIFY_QP_EX_ATTR_MASK = IBV_QP_RATE_LIMIT, 1571 }; 1572 1573 int mlx5_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, 1574 int attr_mask) 1575 { 1576 struct ibv_modify_qp cmd = {}; 1577 struct ibv_modify_qp_ex cmd_ex = {}; 1578 struct ibv_modify_qp_resp_ex resp = {}; 1579 struct mlx5_qp *mqp = to_mqp(qp); 1580 struct mlx5_context *context = to_mctx(qp->context); 1581 int ret; 1582 uint32_t *db; 1583 1584 if (mqp->rss_qp) 1585 return ENOSYS; 1586 1587 if (attr_mask & IBV_QP_PORT) { 1588 switch (qp->qp_type) { 1589 case IBV_QPT_RAW_PACKET: 1590 if (context->cached_link_layer[attr->port_num - 1] == 1591 IBV_LINK_LAYER_ETHERNET) { 1592 if (context->cached_device_cap_flags & 1593 IBV_DEVICE_RAW_IP_CSUM) 1594 mqp->qp_cap_cache |= 1595 MLX5_CSUM_SUPPORT_RAW_OVER_ETH | 1596 MLX5_RX_CSUM_VALID; 1597 1598 if (ibv_is_qpt_supported( 1599 context->cached_tso_caps.supported_qpts, 1600 IBV_QPT_RAW_PACKET)) 1601 mqp->max_tso = 1602 context->cached_tso_caps.max_tso; 1603 } 1604 break; 1605 default: 1606 break; 1607 } 1608 } 1609 1610 if (attr_mask & MLX5_MODIFY_QP_EX_ATTR_MASK) 1611 ret = ibv_cmd_modify_qp_ex(qp, attr, attr_mask, 1612 &cmd_ex, 1613 sizeof(cmd_ex), sizeof(cmd_ex), 1614 &resp, 1615 sizeof(resp), sizeof(resp)); 1616 else 1617 ret = ibv_cmd_modify_qp(qp, attr, attr_mask, 1618 &cmd, sizeof(cmd)); 1619 1620 if (!ret && 1621 (attr_mask & IBV_QP_STATE) && 1622 attr->qp_state == IBV_QPS_RESET) { 1623 if (qp->recv_cq) { 1624 mlx5_cq_clean(to_mcq(qp->recv_cq), mqp->rsc.rsn, 1625 qp->srq ? to_msrq(qp->srq) : NULL); 1626 } 1627 if (qp->send_cq != qp->recv_cq && qp->send_cq) 1628 mlx5_cq_clean(to_mcq(qp->send_cq), 1629 to_mqp(qp)->rsc.rsn, NULL); 1630 1631 mlx5_init_qp_indices(mqp); 1632 db = mqp->db; 1633 db[MLX5_RCV_DBR] = 0; 1634 db[MLX5_SND_DBR] = 0; 1635 } 1636 1637 /* 1638 * When the Raw Packet QP is in INIT state, its RQ 1639 * underneath is already in RDY, which means it can 1640 * receive packets. According to the IB spec, a QP can't 1641 * receive packets until moved to RTR state. To achieve this, 1642 * for Raw Packet QPs, we update the doorbell record 1643 * once the QP is moved to RTR. 1644 */ 1645 if (!ret && 1646 (attr_mask & IBV_QP_STATE) && 1647 attr->qp_state == IBV_QPS_RTR && 1648 qp->qp_type == IBV_QPT_RAW_PACKET) { 1649 mlx5_spin_lock(&mqp->rq.lock); 1650 mqp->db[MLX5_RCV_DBR] = htobe32(mqp->rq.head & 0xffff); 1651 mlx5_spin_unlock(&mqp->rq.lock); 1652 } 1653 1654 return ret; 1655 } 1656 1657 #define RROCE_UDP_SPORT_MIN 0xC000 1658 #define RROCE_UDP_SPORT_MAX 0xFFFF 1659 struct ibv_ah *mlx5_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr) 1660 { 1661 struct mlx5_context *ctx = to_mctx(pd->context); 1662 struct ibv_port_attr port_attr; 1663 struct mlx5_ah *ah; 1664 uint32_t gid_type; 1665 uint32_t tmp; 1666 uint8_t grh; 1667 int is_eth; 1668 1669 if (attr->port_num < 1 || attr->port_num > ctx->num_ports) 1670 return NULL; 1671 1672 if (ctx->cached_link_layer[attr->port_num - 1]) { 1673 is_eth = ctx->cached_link_layer[attr->port_num - 1] == 1674 IBV_LINK_LAYER_ETHERNET; 1675 } else { 1676 if (ibv_query_port(pd->context, attr->port_num, &port_attr)) 1677 return NULL; 1678 1679 is_eth = (port_attr.link_layer == IBV_LINK_LAYER_ETHERNET); 1680 } 1681 1682 if (unlikely((!attr->is_global) && is_eth)) { 1683 errno = EINVAL; 1684 return NULL; 1685 } 1686 1687 ah = calloc(1, sizeof *ah); 1688 if (!ah) 1689 return NULL; 1690 1691 if (is_eth) { 1692 if (ibv_query_gid_type(pd->context, attr->port_num, 1693 attr->grh.sgid_index, &gid_type)) 1694 goto err; 1695 1696 if (gid_type == IBV_GID_TYPE_ROCE_V2) 1697 ah->av.rlid = htobe16(rand() % (RROCE_UDP_SPORT_MAX + 1 1698 - RROCE_UDP_SPORT_MIN) 1699 + RROCE_UDP_SPORT_MIN); 1700 /* Since RoCE packets must contain GRH, this bit is reserved 1701 * for RoCE and shouldn't be set. 1702 */ 1703 grh = 0; 1704 } else { 1705 ah->av.fl_mlid = attr->src_path_bits & 0x7f; 1706 ah->av.rlid = htobe16(attr->dlid); 1707 grh = 1; 1708 } 1709 ah->av.stat_rate_sl = (attr->static_rate << 4) | attr->sl; 1710 if (attr->is_global) { 1711 ah->av.tclass = attr->grh.traffic_class; 1712 ah->av.hop_limit = attr->grh.hop_limit; 1713 tmp = htobe32((grh << 30) | 1714 ((attr->grh.sgid_index & 0xff) << 20) | 1715 (attr->grh.flow_label & 0xfffff)); 1716 ah->av.grh_gid_fl = tmp; 1717 memcpy(ah->av.rgid, attr->grh.dgid.raw, 16); 1718 } 1719 1720 if (is_eth) { 1721 if (ctx->cmds_supp_uhw & MLX5_USER_CMDS_SUPP_UHW_CREATE_AH) { 1722 struct mlx5_create_ah_resp resp = {}; 1723 1724 if (ibv_cmd_create_ah(pd, &ah->ibv_ah, attr, &resp.ibv_resp, sizeof(resp))) 1725 goto err; 1726 1727 ah->kern_ah = true; 1728 memcpy(ah->av.rmac, resp.dmac, ETHERNET_LL_SIZE); 1729 } else { 1730 uint16_t vid; 1731 1732 if (ibv_resolve_eth_l2_from_gid(pd->context, attr, 1733 ah->av.rmac, &vid)) 1734 goto err; 1735 } 1736 } 1737 1738 return &ah->ibv_ah; 1739 err: 1740 free(ah); 1741 return NULL; 1742 } 1743 1744 int mlx5_destroy_ah(struct ibv_ah *ah) 1745 { 1746 struct mlx5_ah *mah = to_mah(ah); 1747 int err; 1748 1749 if (mah->kern_ah) { 1750 err = ibv_cmd_destroy_ah(ah); 1751 if (err) 1752 return err; 1753 } 1754 1755 free(mah); 1756 return 0; 1757 } 1758 1759 int mlx5_attach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid) 1760 { 1761 return ibv_cmd_attach_mcast(qp, gid, lid); 1762 } 1763 1764 int mlx5_detach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid) 1765 { 1766 return ibv_cmd_detach_mcast(qp, gid, lid); 1767 } 1768 1769 struct ibv_qp *mlx5_create_qp_ex(struct ibv_context *context, 1770 struct ibv_qp_init_attr_ex *attr) 1771 { 1772 return create_qp(context, attr); 1773 } 1774 1775 int mlx5_get_srq_num(struct ibv_srq *srq, uint32_t *srq_num) 1776 { 1777 struct mlx5_srq *msrq = to_msrq(srq); 1778 1779 *srq_num = msrq->srqn; 1780 1781 return 0; 1782 } 1783 1784 struct ibv_xrcd * 1785 mlx5_open_xrcd(struct ibv_context *context, 1786 struct ibv_xrcd_init_attr *xrcd_init_attr) 1787 { 1788 int err; 1789 struct verbs_xrcd *xrcd; 1790 struct ibv_open_xrcd cmd = {}; 1791 struct ibv_open_xrcd_resp resp = {}; 1792 1793 xrcd = calloc(1, sizeof(*xrcd)); 1794 if (!xrcd) 1795 return NULL; 1796 1797 err = ibv_cmd_open_xrcd(context, xrcd, sizeof(*xrcd), xrcd_init_attr, 1798 &cmd, sizeof(cmd), &resp, sizeof(resp)); 1799 if (err) { 1800 free(xrcd); 1801 return NULL; 1802 } 1803 1804 return &xrcd->xrcd; 1805 } 1806 1807 int mlx5_close_xrcd(struct ibv_xrcd *ib_xrcd) 1808 { 1809 struct verbs_xrcd *xrcd = container_of(ib_xrcd, struct verbs_xrcd, xrcd); 1810 int ret; 1811 1812 ret = ibv_cmd_close_xrcd(xrcd); 1813 if (!ret) 1814 free(xrcd); 1815 1816 return ret; 1817 } 1818 1819 static struct ibv_srq * 1820 mlx5_create_xrc_srq(struct ibv_context *context, 1821 struct ibv_srq_init_attr_ex *attr) 1822 { 1823 int err; 1824 struct mlx5_create_srq_ex cmd; 1825 struct mlx5_create_srq_resp resp; 1826 struct mlx5_srq *msrq; 1827 struct mlx5_context *ctx = to_mctx(context); 1828 int max_sge; 1829 struct ibv_srq *ibsrq; 1830 int uidx; 1831 FILE *fp = ctx->dbg_fp; 1832 1833 msrq = calloc(1, sizeof(*msrq)); 1834 if (!msrq) 1835 return NULL; 1836 1837 ibsrq = (struct ibv_srq *)&msrq->vsrq; 1838 1839 memset(&cmd, 0, sizeof(cmd)); 1840 memset(&resp, 0, sizeof(resp)); 1841 1842 if (mlx5_spinlock_init(&msrq->lock)) { 1843 fprintf(stderr, "%s-%d:\n", __func__, __LINE__); 1844 goto err; 1845 } 1846 1847 if (attr->attr.max_wr > ctx->max_srq_recv_wr) { 1848 fprintf(stderr, "%s-%d:max_wr %d, max_srq_recv_wr %d\n", 1849 __func__, __LINE__, attr->attr.max_wr, 1850 ctx->max_srq_recv_wr); 1851 errno = EINVAL; 1852 goto err_spl; 1853 } 1854 1855 /* 1856 * this calculation does not consider required control segments. The 1857 * final calculation is done again later. This is done so to avoid 1858 * overflows of variables 1859 */ 1860 max_sge = ctx->max_recv_wr / sizeof(struct mlx5_wqe_data_seg); 1861 if (attr->attr.max_sge > max_sge) { 1862 fprintf(stderr, "%s-%d:max_wr %d, max_srq_recv_wr %d\n", 1863 __func__, __LINE__, attr->attr.max_wr, 1864 ctx->max_srq_recv_wr); 1865 errno = EINVAL; 1866 goto err_spl; 1867 } 1868 1869 msrq->max = align_queue_size(attr->attr.max_wr + 1); 1870 msrq->max_gs = attr->attr.max_sge; 1871 msrq->counter = 0; 1872 1873 if (mlx5_alloc_srq_buf(context, msrq)) { 1874 fprintf(stderr, "%s-%d:\n", __func__, __LINE__); 1875 goto err_spl; 1876 } 1877 1878 msrq->db = mlx5_alloc_dbrec(ctx); 1879 if (!msrq->db) { 1880 fprintf(stderr, "%s-%d:\n", __func__, __LINE__); 1881 goto err_free; 1882 } 1883 1884 *msrq->db = 0; 1885 1886 cmd.buf_addr = (uintptr_t)msrq->buf.buf; 1887 cmd.db_addr = (uintptr_t)msrq->db; 1888 msrq->wq_sig = srq_sig_enabled(); 1889 if (msrq->wq_sig) 1890 cmd.flags = MLX5_SRQ_FLAG_SIGNATURE; 1891 1892 attr->attr.max_sge = msrq->max_gs; 1893 if (ctx->cqe_version) { 1894 uidx = mlx5_store_uidx(ctx, msrq); 1895 if (uidx < 0) { 1896 mlx5_dbg(fp, MLX5_DBG_QP, "Couldn't find free user index\n"); 1897 goto err_free_db; 1898 } 1899 cmd.uidx = uidx; 1900 } else { 1901 cmd.uidx = 0xffffff; 1902 pthread_mutex_lock(&ctx->srq_table_mutex); 1903 } 1904 1905 err = ibv_cmd_create_srq_ex(context, &msrq->vsrq, sizeof(msrq->vsrq), 1906 attr, &cmd.ibv_cmd, sizeof(cmd), 1907 &resp.ibv_resp, sizeof(resp)); 1908 if (err) 1909 goto err_free_uidx; 1910 1911 if (!ctx->cqe_version) { 1912 err = mlx5_store_srq(to_mctx(context), resp.srqn, msrq); 1913 if (err) 1914 goto err_destroy; 1915 1916 pthread_mutex_unlock(&ctx->srq_table_mutex); 1917 } 1918 1919 msrq->srqn = resp.srqn; 1920 msrq->rsc.type = MLX5_RSC_TYPE_XSRQ; 1921 msrq->rsc.rsn = ctx->cqe_version ? cmd.uidx : resp.srqn; 1922 1923 return ibsrq; 1924 1925 err_destroy: 1926 ibv_cmd_destroy_srq(ibsrq); 1927 1928 err_free_uidx: 1929 if (ctx->cqe_version) 1930 mlx5_clear_uidx(ctx, cmd.uidx); 1931 else 1932 pthread_mutex_unlock(&ctx->srq_table_mutex); 1933 1934 err_free_db: 1935 mlx5_free_db(ctx, msrq->db); 1936 1937 err_free: 1938 free(msrq->wrid); 1939 mlx5_free_buf(&msrq->buf); 1940 1941 err_spl: 1942 mlx5_spinlock_destroy(&msrq->lock); 1943 1944 err: 1945 free(msrq); 1946 1947 return NULL; 1948 } 1949 1950 struct ibv_srq *mlx5_create_srq_ex(struct ibv_context *context, 1951 struct ibv_srq_init_attr_ex *attr) 1952 { 1953 if (!(attr->comp_mask & IBV_SRQ_INIT_ATTR_TYPE) || 1954 (attr->srq_type == IBV_SRQT_BASIC)) 1955 return mlx5_create_srq(attr->pd, 1956 (struct ibv_srq_init_attr *)attr); 1957 else if (attr->srq_type == IBV_SRQT_XRC) 1958 return mlx5_create_xrc_srq(context, attr); 1959 1960 return NULL; 1961 } 1962 1963 int mlx5_query_device_ex(struct ibv_context *context, 1964 const struct ibv_query_device_ex_input *input, 1965 struct ibv_device_attr_ex *attr, 1966 size_t attr_size) 1967 { 1968 struct mlx5_context *mctx = to_mctx(context); 1969 struct mlx5_query_device_ex_resp resp; 1970 struct mlx5_query_device_ex cmd; 1971 struct ibv_device_attr *a; 1972 uint64_t raw_fw_ver; 1973 unsigned sub_minor; 1974 unsigned major; 1975 unsigned minor; 1976 int err; 1977 int cmd_supp_uhw = mctx->cmds_supp_uhw & 1978 MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE; 1979 1980 memset(&cmd, 0, sizeof(cmd)); 1981 memset(&resp, 0, sizeof(resp)); 1982 err = ibv_cmd_query_device_ex(context, input, attr, attr_size, 1983 &raw_fw_ver, 1984 &cmd.ibv_cmd, sizeof(cmd.ibv_cmd), sizeof(cmd), 1985 &resp.ibv_resp, sizeof(resp.ibv_resp), 1986 cmd_supp_uhw ? sizeof(resp) : sizeof(resp.ibv_resp)); 1987 if (err) 1988 return err; 1989 1990 attr->tso_caps = resp.tso_caps; 1991 attr->rss_caps.rx_hash_fields_mask = resp.rss_caps.rx_hash_fields_mask; 1992 attr->rss_caps.rx_hash_function = resp.rss_caps.rx_hash_function; 1993 attr->packet_pacing_caps = resp.packet_pacing_caps.caps; 1994 1995 if (resp.support_multi_pkt_send_wqe) 1996 mctx->vendor_cap_flags |= MLX5_VENDOR_CAP_FLAGS_MPW; 1997 1998 mctx->cqe_comp_caps = resp.cqe_comp_caps; 1999 2000 major = (raw_fw_ver >> 32) & 0xffff; 2001 minor = (raw_fw_ver >> 16) & 0xffff; 2002 sub_minor = raw_fw_ver & 0xffff; 2003 a = &attr->orig_attr; 2004 snprintf(a->fw_ver, sizeof(a->fw_ver), "%d.%d.%04d", 2005 major, minor, sub_minor); 2006 2007 return 0; 2008 } 2009 2010 static int rwq_sig_enabled(struct ibv_context *context) 2011 { 2012 char *env; 2013 2014 env = getenv("MLX5_RWQ_SIGNATURE"); 2015 if (env) 2016 return 1; 2017 2018 return 0; 2019 } 2020 2021 static void mlx5_free_rwq_buf(struct mlx5_rwq *rwq, struct ibv_context *context) 2022 { 2023 struct mlx5_context *ctx = to_mctx(context); 2024 2025 mlx5_free_actual_buf(ctx, &rwq->buf); 2026 free(rwq->rq.wrid); 2027 } 2028 2029 static int mlx5_alloc_rwq_buf(struct ibv_context *context, 2030 struct mlx5_rwq *rwq, 2031 int size) 2032 { 2033 int err; 2034 enum mlx5_alloc_type default_alloc_type = MLX5_ALLOC_TYPE_PREFER_CONTIG; 2035 2036 rwq->rq.wrid = malloc(rwq->rq.wqe_cnt * sizeof(uint64_t)); 2037 if (!rwq->rq.wrid) { 2038 errno = ENOMEM; 2039 return -1; 2040 } 2041 2042 err = mlx5_alloc_prefered_buf(to_mctx(context), &rwq->buf, 2043 align(rwq->buf_size, to_mdev 2044 (context->device)->page_size), 2045 to_mdev(context->device)->page_size, 2046 default_alloc_type, 2047 MLX5_RWQ_PREFIX); 2048 2049 if (err) { 2050 free(rwq->rq.wrid); 2051 errno = ENOMEM; 2052 return -1; 2053 } 2054 2055 return 0; 2056 } 2057 2058 struct ibv_wq *mlx5_create_wq(struct ibv_context *context, 2059 struct ibv_wq_init_attr *attr) 2060 { 2061 struct mlx5_create_wq cmd; 2062 struct mlx5_create_wq_resp resp; 2063 int err; 2064 struct mlx5_rwq *rwq; 2065 struct mlx5_context *ctx = to_mctx(context); 2066 int ret; 2067 int32_t usr_idx = 0; 2068 FILE *fp = ctx->dbg_fp; 2069 2070 if (attr->wq_type != IBV_WQT_RQ) 2071 return NULL; 2072 2073 memset(&cmd, 0, sizeof(cmd)); 2074 memset(&resp, 0, sizeof(resp)); 2075 2076 rwq = calloc(1, sizeof(*rwq)); 2077 if (!rwq) 2078 return NULL; 2079 2080 rwq->wq_sig = rwq_sig_enabled(context); 2081 if (rwq->wq_sig) 2082 cmd.drv.flags = MLX5_RWQ_FLAG_SIGNATURE; 2083 2084 ret = mlx5_calc_rwq_size(ctx, rwq, attr); 2085 if (ret < 0) { 2086 errno = -ret; 2087 goto err; 2088 } 2089 2090 ret = ibv_init_wq(&rwq->wq); 2091 if (ret < 0) 2092 goto err; 2093 2094 rwq->buf_size = ret; 2095 if (mlx5_alloc_rwq_buf(context, rwq, ret)) 2096 goto err_cleanup_wq; 2097 2098 mlx5_init_rwq_indices(rwq); 2099 2100 if (mlx5_spinlock_init(&rwq->rq.lock)) 2101 goto err_free_rwq_buf; 2102 2103 rwq->db = mlx5_alloc_dbrec(ctx); 2104 if (!rwq->db) 2105 goto err_spl; 2106 2107 rwq->db[MLX5_RCV_DBR] = 0; 2108 rwq->db[MLX5_SND_DBR] = 0; 2109 rwq->pbuff = rwq->buf.buf + rwq->rq.offset; 2110 rwq->recv_db = &rwq->db[MLX5_RCV_DBR]; 2111 cmd.drv.buf_addr = (uintptr_t)rwq->buf.buf; 2112 cmd.drv.db_addr = (uintptr_t)rwq->db; 2113 cmd.drv.rq_wqe_count = rwq->rq.wqe_cnt; 2114 cmd.drv.rq_wqe_shift = rwq->rq.wqe_shift; 2115 usr_idx = mlx5_store_uidx(ctx, rwq); 2116 if (usr_idx < 0) { 2117 mlx5_dbg(fp, MLX5_DBG_QP, "Couldn't find free user index\n"); 2118 goto err_free_db_rec; 2119 } 2120 2121 cmd.drv.user_index = usr_idx; 2122 err = ibv_cmd_create_wq(context, attr, &rwq->wq, &cmd.ibv_cmd, 2123 sizeof(cmd.ibv_cmd), 2124 sizeof(cmd), 2125 &resp.ibv_resp, sizeof(resp.ibv_resp), 2126 sizeof(resp)); 2127 if (err) 2128 goto err_create; 2129 2130 rwq->rsc.type = MLX5_RSC_TYPE_RWQ; 2131 rwq->rsc.rsn = cmd.drv.user_index; 2132 2133 rwq->wq.post_recv = mlx5_post_wq_recv; 2134 return &rwq->wq; 2135 2136 err_create: 2137 mlx5_clear_uidx(ctx, cmd.drv.user_index); 2138 err_free_db_rec: 2139 mlx5_free_db(to_mctx(context), rwq->db); 2140 err_spl: 2141 mlx5_spinlock_destroy(&rwq->rq.lock); 2142 err_free_rwq_buf: 2143 mlx5_free_rwq_buf(rwq, context); 2144 err_cleanup_wq: 2145 ibv_cleanup_wq(&rwq->wq); 2146 err: 2147 free(rwq); 2148 return NULL; 2149 } 2150 2151 int mlx5_modify_wq(struct ibv_wq *wq, struct ibv_wq_attr *attr) 2152 { 2153 struct mlx5_modify_wq cmd = {}; 2154 struct mlx5_rwq *rwq = to_mrwq(wq); 2155 2156 if ((attr->attr_mask & IBV_WQ_ATTR_STATE) && 2157 attr->wq_state == IBV_WQS_RDY) { 2158 if ((attr->attr_mask & IBV_WQ_ATTR_CURR_STATE) && 2159 attr->curr_wq_state != wq->state) 2160 return -EINVAL; 2161 2162 if (wq->state == IBV_WQS_RESET) { 2163 mlx5_spin_lock(&to_mcq(wq->cq)->lock); 2164 __mlx5_cq_clean(to_mcq(wq->cq), 2165 rwq->rsc.rsn, NULL); 2166 mlx5_spin_unlock(&to_mcq(wq->cq)->lock); 2167 mlx5_init_rwq_indices(rwq); 2168 rwq->db[MLX5_RCV_DBR] = 0; 2169 rwq->db[MLX5_SND_DBR] = 0; 2170 } 2171 } 2172 2173 return ibv_cmd_modify_wq(wq, attr, &cmd.ibv_cmd, sizeof(cmd.ibv_cmd), sizeof(cmd)); 2174 } 2175 2176 int mlx5_destroy_wq(struct ibv_wq *wq) 2177 { 2178 struct mlx5_rwq *rwq = to_mrwq(wq); 2179 int ret; 2180 2181 ret = ibv_cmd_destroy_wq(wq); 2182 if (ret) 2183 return ret; 2184 2185 mlx5_spin_lock(&to_mcq(wq->cq)->lock); 2186 __mlx5_cq_clean(to_mcq(wq->cq), rwq->rsc.rsn, NULL); 2187 mlx5_spin_unlock(&to_mcq(wq->cq)->lock); 2188 mlx5_clear_uidx(to_mctx(wq->context), rwq->rsc.rsn); 2189 mlx5_free_db(to_mctx(wq->context), rwq->db); 2190 mlx5_spinlock_destroy(&rwq->rq.lock); 2191 mlx5_free_rwq_buf(rwq, wq->context); 2192 ibv_cleanup_wq(&rwq->wq); 2193 free(rwq); 2194 2195 return 0; 2196 } 2197 2198 struct ibv_rwq_ind_table *mlx5_create_rwq_ind_table(struct ibv_context *context, 2199 struct ibv_rwq_ind_table_init_attr *init_attr) 2200 { 2201 struct ibv_create_rwq_ind_table *cmd; 2202 struct mlx5_create_rwq_ind_table_resp resp; 2203 struct ibv_rwq_ind_table *ind_table; 2204 uint32_t required_tbl_size; 2205 int num_tbl_entries; 2206 int cmd_size; 2207 int err; 2208 2209 num_tbl_entries = 1 << init_attr->log_ind_tbl_size; 2210 /* Data must be u64 aligned */ 2211 required_tbl_size = (num_tbl_entries * sizeof(uint32_t)) < sizeof(uint64_t) ? 2212 sizeof(uint64_t) : (num_tbl_entries * sizeof(uint32_t)); 2213 2214 cmd_size = required_tbl_size + sizeof(*cmd); 2215 cmd = calloc(1, cmd_size); 2216 if (!cmd) 2217 return NULL; 2218 2219 memset(&resp, 0, sizeof(resp)); 2220 ind_table = calloc(1, sizeof(*ind_table)); 2221 if (!ind_table) 2222 goto free_cmd; 2223 2224 err = ibv_cmd_create_rwq_ind_table(context, init_attr, ind_table, cmd, 2225 cmd_size, cmd_size, &resp.ibv_resp, sizeof(resp.ibv_resp), 2226 sizeof(resp)); 2227 if (err) 2228 goto err; 2229 2230 free(cmd); 2231 return ind_table; 2232 2233 err: 2234 free(ind_table); 2235 free_cmd: 2236 free(cmd); 2237 return NULL; 2238 } 2239 2240 int mlx5_destroy_rwq_ind_table(struct ibv_rwq_ind_table *rwq_ind_table) 2241 { 2242 int ret; 2243 2244 ret = ibv_cmd_destroy_rwq_ind_table(rwq_ind_table); 2245 2246 if (ret) 2247 return ret; 2248 2249 free(rwq_ind_table); 2250 return 0; 2251 } 2252